使用 TIde IMAP4.UID RetrieveTextPeek 2() 检索电子邮件的消息正文时出现错误字符

I get wrong characters when retreiving the message body of an email using TIdIMAP4.UIDRetrieveTextPeek2()

在我的应用程序(Windows 10,Delphi 10.4)中,我使用 TIdIMAP4 使用以下代码从服务器检索电子邮件正文:

var aBody : string := '';
UIDRetrieveTextPeek2(MsgID,aBody);

如果返回的字符串以<!DOCTYPE html>'#$D#$A'<html lang="en" xmlns="http://www.w3.org/1999/xhtml" xmlns:o="urn:schemas-microsoft-com:...

开头

<html><head><style type="text/css">'#$D#$A'@media screen and (max-width:480px) {'#$D#$A' .background_inner {'#$D#$A' padding: 0!important;'#$D#$A'....

我可以看到 HTML 内容的格式正确,但是当它以以下内容开头时:

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.='#$D#$A'w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">=0A<html xmlns=3D"http://www.='#$D#$A'w3.org/1999/xhtml"><head><style type=3D"text/css" media=3D"all">=0A=09a:hov='#$D#$A'er {=09color: red;=09}=0A=09a {=0A=09=09text-decoration: underline;=0A=09='#$D#$A'=09color: #0088cc;=0A=09}....

HTML 格式不正确 (=0A=09a:hov='#$D#$A'er {=09color: red;=09}=0A=09a {=0A=09=09text-decoration: underline;=0A=09=)

我该如何解决?

更新

这是一个可重现的例子:

unit Unit11;

interface

uses
  Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
  Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.StdCtrls,
  IdIMAP4,IdMessage,IdSSLOpenSSL,IdExplicitTLSClientServerBase, IdBaseComponent, IdIntercept, IdLogBase, IdLogFile;

type
  TForm11 = class(TForm)
    Button1: TButton;
    procedure Button1Click(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
  end;

var
  Form11: TForm11;

implementation

{$R *.dfm}

procedure TForm11.Button1Click(Sender: TObject);
var SearchInfo: array of TIdIMAP4SearchRec;
    IdMessage1: TIdMessage;
    IdSSLIOHandlerSocketOpenSSL1: TIdSSLIOHandlerSocketOpenSSL;
    i, msgs : integer;
    MsgID, aBody : string;
begin
    with TIdIMAP4.create do try
        IdSSLIOHandlerSocketOpenSSL1 := TIdSSLIOHandlerSocketOpenSSL.Create(nil);
        IdSSLIOHandlerSocketOpenSSL1.SSLOptions.Method := sslvSSLv23;
        IOhandler := IdSSLIOHandlerSocketOpenSSL1;
        authType := iatUserPass;
        Host := 'imap.gmail.com';
        userName := 'xxxxxxxxxx';
        password := 'yyyyyyyyyy';
        UseTLS := utUseImplicitTLS;
        if Connect(TRUE) then
        try
            SelectMailBox('INBOX');
            SetLength(SearchInfo, 1);
            SearchInfo[0].SearchKey := skAll;
            if SearchMailBox(SearchInfo)
            and (High(MailBox.SearchResult) > -1) then
            try
                msgs := High(MailBox.SearchResult)+1;

                for i := 0 to msgs - 1 do
                begin
                    MsgID := '';
                    GetUID(MailBox.SearchResult[i], MsgID);
                    // some bodyies are unreadable, some ok and some as the following are unencoded QP
                    if MsgID = '16805' then begin
                        var IdLogFile1: TIdLogFile := TIdLogFile.Create(nil);
                        IdLogFile1.Filename := 'log.txt';
                        intercept := IdLogFile1;
                        IdLogFile1.Active := TRUE;
                        UIDRetrieveTextPeek2(MsgID,aBody);
                        IdLogFile1.Active := FALSE;
                        IdLogFile1.Free;
                    end;
                end;
            finally

            end;
        finally

        end;
    finally
        IdSSLIOHandlerSocketOpenSSL1.free;
    end;
end;

end.

及其捕获的日志:

Sent 26/7/2021 10:09:45 ??: C55 UID FETCH 16805 (BODYSTRUCTURE)<EOL>
Recv 26/7/2021 10:09:45 ??: * 51 FETCH (UID 16805 BODYSTRUCTURE (("TEXT" "PLAIN" ("CHARSET" "utf-8") NIL NIL "8BIT" 760 16 NIL NIL NIL)("TEXT" "HTML" ("CHARSET" "utf-8") NIL NIL "QUOTED-PRINTABLE" 3962 80 NIL NIL NIL) "ALTERNATIVE" ("BOUNDARY" "----=_NextPart_000_0008_01D583AF.54009F20") NIL NIL))<EOL>
Recv 26/7/2021 10:09:45 ??: C55 OK Success<EOL>
Sent 26/7/2021 10:09:45 ??: C56 UID FETCH 16805 (BODY.PEEK[2])<EOL>
Recv 26/7/2021 10:09:46 ??: * 51 FETCH (UID 16805 BODY[2] 
Recv 26/7/2021 10:09:46 ??: {3962}<EOL>
Recv 26/7/2021 10:09:46 ??: <html><EOL>    <head><EOL>  <style type=3D"text/css"><EOL>            body, td, span, p, th { font-size: 11px; }<EOL>       table.html-email {margin:10px auto;background:#fff;border:solid =<EOL>#dad8d8 1px;}<EOL>        .html-email tr{border-bottom : 1px solid #eee;}<EOL>        span.grey {color:#666;}<EOL>        span.date {color:#666;font-size: 10px;}<EOL>        a.default:link, a.default:hover, a.default:visited =<EOL>{color:#666;line-height:25px;background: #f2f2f2;margin: 10px ;padding: =<EOL>3px 8px 1px 8px;border: solid #CAC9C9 1px;border-radius: =<EOL>4px;-webkit-border-radius: 4px;-moz-border-radius: 4px;text-shadow: 1px =<EOL>1px 1px #f2f2f2;font-size: 12px;background-position: 0px 0px;display: =<EOL>inline-block;text-decoration: none;}<EOL>       a.default:hover {color:#888;background: #f8f8f8;}<EOL>      .cart-summary{ }<EOL>       .html-email th { background: #ccc;margin: 0px;padding: 10px;}<EOL>      .sectiontableentry2, .html-email th, .cart-summary th{ background: =<EOL>#ccc;margin: 0px;padding: 10px;}<EOL>      .sectiontableentry1, .html-email td, .cart-summary td {background: =<EOL>#fff;margin: 0px;padding: 10px;}<EOL>  </style><EOL><EOL>    </head><EOL><EOL>    <body style=3D"background: #F2F2F2;word-wrap: break-word;"><EOL> <div style=3D"background-color: #e6e6e6;" width=3D"100%"><EOL>      <table style=3D"margin: auto;" cellpadding=3D"0" cellspacing=3D"0"  =<EOL>><EOL>        <tr><EOL>           <td><EOL>           <table width=3D"100%" border=3D"0" cellpadding=3D"0" =<EOL>cellspacing=3D"0" class=3D"html-email"><EOL>             <tr><EOL>               <td ><EOL><EOL>                 =CE=9A=CE=B1=CE=BB=CF=8E=CF=82 =<EOL>=CE=AE=CF=81=CE=B8=CE=B1=CF=84=CE=B5 =CF=83=CF=84=CE=BF =<EOL>=CE=91=CE=A1=CE=93=CE=A5=CE=A1=CE=A9 =<EOL>=CE=A0=CE=91=CE=A0=CE=91=CE=93=CE=95=CE=A9=CE=A1=CE=93=CE=99=CE=9F=CE=A5  =<EOL>              <br /><EOL>                                 </td><EOL>              </tr><EOL>          </table><EOL><EOL>          <table class=3D"html-email" cellspacing=3D"0" cellpadding=3D"0" =<EOL>border=3D"0" width=3D"100%"><EOL>             <tr><EOL>               <th width=3D"100%"><EOL>                    =CE=A4=CE=B1 =CF=83=CF=84=CE=BF=CE=B9=CF=87=CE=B5=CE=AF=CE=B1 =<EOL>=CF=84=CE=B7=CF=82 =CE=B5=CE=B3=CE=B3=CF=81=CE=B1=CF=86=CE=AE=CF=82 =<EOL>=CF=83=CE=B1=CF=82                </th><EOL><EOL>             </tr><EOL>              <tr><EOL>               <td valign=3D"top" width=3D"100%"><EOL>                 =CE=8C=CE=BD=CE=BF=CE=BC=CE=B1 =<EOL>=CF=83=CF=8D=CE=BD=CE=B4=CE=B5=CF=83=CE=B7=CF=82dpap<br />=CE=A4=CE=BF =<EOL>=CF=8C=CE=BD=CE=BF=CE=BC=CE=B1 =CF=80=CE=BF=CF=85 =<EOL>=CE=B5=CE=BC=CF=86=CE=B1=CE=BD=CE=AF=CE=B6=CE=B5=CF=84=CE=B1=CE=B9: =<EOL>=CE=91=CE=A1=CE=93=CE=A5=CE=A1=CE=A9 =<EOL>=CE=A0=CE=91=CE=A0=CE=91=CE=93=CE=95=CE=A9=CE=A1=CE=93=CE=99=CE=9F=CE=A5<=<EOL>br />O =CE=BA=CF=89=CE=B4=CE=B9=CE=BA=CF=8C=CF=82 =<EOL>=CF=83=CE=B1=CF=82staran<br /><br />=CE=97 =<EOL>=CE=B4=CE=B9=CE=B5=CF=8D=CE=B8=CF=85=CE=BD=CF=83=CE=B7 =<EOL>=CF=83=CE=B1=CF=82: <br />E-Mail: dpapdpap@gmail.com<br =<EOL>/>=CE=A0=CF=81=CE=BF=CE=B2=CE=B1=CE=BB=CE=BB=CF=8C=CE=BC=CE=B5=CE=BD=CE=BF=<EOL> =CF=8C=CE=BD=CE=BF=CE=BC=CE=B1: =CE=91=CE=A1=CE=93=CE=A5=CE=A1=CE=A9 =<EOL>=CE=A0=CE=91=CE=A0=CE=91=CE=93=CE=95=CE=A9=CE=A1=CE=93=CE=99=CE=9F=CE=A5<=<EOL>br />=CE=9F=CE=BD=CE=BF=CE=BC=CE=B1 =<EOL>=CE=B5=CF=84=CE=B1=CE=B9=CF=81=CE=AF=CE=B1=CF=82: =<EOL>=CE=91.=CE=94.=CE=A0=CE=91=CE=A0=CE=91=CE=93=CE=95=CE=A9=CE=A1=CE=93=CE=99=<EOL>=CE=9F=CE=A5<br />=CE=9F=CE=BD=CE=BF=CE=BC=CE=B1: =<EOL>=CE=91=CE=A1=CE=93=CE=A5=CE=A1=CE=A9<br =<EOL>/>=CE=95=CF=80=CE=AF=CE=B8=CE=B5=CF=84=CE=BF: =<EOL>=CE=A0=CE=91=CE=A0=CE=91=CE=93=CE=95=CE=A9=CE=A1=CE=93=CE=99=CE=9F=CE=A5<=<EOL>br />=CE=94=CE=B9=CE=B5=CF=8D=CE=B8=CF=85=CE=BD=CF=83=CE=B7 1: =<EOL>=CE=A6=CE=95=CE=99=CE=94=CE=99=CE=A0=CE=A0=CE=99=CE=94=CE=9F=CE=A5 2<br =<EOL>/>=CE=A4=CE=B1=CF=87. =CE=BA=CF=89=CE=B4=CE=B9=CE=BA=CF=8C=CF=82: =<EOL>32131<br />=CE=A0=CF=8C=CE=BB=CE=B7: =<EOL>=CE=9B=CE=99=CE=92=CE=91=CE=94=CE=95=CE=99=CE=91<br =<EOL>/>=CE=A7=CF=8E=CF=81=CE=B1: Greece<br />=CE=9D=CE=BF=CE=BC=CF=8C=CF=82 / =<EOL>=CE=A0=CE=B5=CF=81=CE=B9=CE=BF=CF=87=CE=AE: =<EOL>=CE=92=CE=9F=CE=99=CE=A9=CE=A4=CE=99=CE=91=CE=A3<br =<EOL>/>=CE=A4=CE=B7=CE=BB.: 2261089120<br />=CE=BA=CE=B9=CE=BD.: =<EOL>6974398860<br />               </td><EOL>              </tr><EOL>          </table><EOL>           </td><EOL>      </tr><EOL>      </table><EOL>   </div><EOL>    </body><EOL></html><EOL><EOL>
Recv 26/7/2021 10:09:46 ??: )<EOL>
Recv 26/7/2021 10:09:46 ??: C56 OK Success<EOL>

TIdIMAP4.UIDRetrieveTextPeek2() 首先检索电子邮件的正文结构,然后扫描它以查找报告的非零大小的第一个文本部分。如果找到 none,则使用最后的文本部分。然后它使用所选部分的指定字节编码和字符集来解码部分的文本以进行输出。

至少,理论上是这样。

在您的日志中,有问题的电子邮件有 2 个文本部分被 IMAP 服务器报告:

  • text/plain,大小 760,编码 "8BIT",字符集 "utf-8"
  • text/html,大小 3962,编码 "QUOTED-PRINTABLE",字符集 "utf-8"

但是,在评论中,您说 UIDRetrieveStructure()UIDRetrieveTextPeek2() 在内部使用)实际上报告了 3 个文本部分:

  • multipart/alternative
  • text/plain,大小 760,编码 "8BIT",字符集 "utf-8"
  • text/html,大小 3962,编码 "QUOTED-PRINTABLE",字符集 "utf-8"

您的日志显示 UIDRetrieveTextPeek2() 正在检索 BODY.PEEK[2],因此它 认为 它正在请求 text/plain 部分的内容(这使得有意义,因为那是第一个非空文本部分),但 实际上 请求 text/html 部分的内容。这必须解决。我已经为此开票了:

#368 TIdIMAP4.InternalRetrieveText() does not retreive text correctly

由于 text/plain 部分的字节编码是 8bitUIDRetrieveTextPeek2() 没有尝试解码 HTML 中的 QP 编码字符,这解释了为什么您在输出 string.

中看到它们(=0A=3D=09 等)