Python imaplib - get_filename() 在附件包含 UTF-8 字符时不起作用
Python imaplib - get_filename() not working when attachment has UTF-8 characters
我有这个功能可以使用 imaplib 从给定的电子邮件下载所有附件
# Download all attachment files for a given email
def downloaAttachmentsInEmail(m, emailid, outputdir, markRead):
resp, data = m.uid("FETCH", emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_bytes(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
open(outputdir + '/' + part.get_filename(), 'wb').write(part.get_payload(decode=True)
if(markRead):
m.uid("STORE", emailid, "+FLAGS", "(\Seen)")
问题是当我尝试下载文件名中包含 UTF-8 字符的文件时它不起作用。我收到此错误,我猜这是因为 part.get_filename() 没有正确读取名称:
OSError: [Errno 22] Invalid argument: './temp//=?UTF-8?B?QkQgUmVsYXTDs3JpbyAywqogRmFzZS5kb2M=?=\r\n\t=?UTF-8?B?eA==?='
我该怎么做才能解决这个问题?
我找到了解决方案
# Download all attachment files for a given email
def downloaAttachmentsInEmail(m, emailid, outputdir, markRead):
resp, data = m.uid("FETCH", emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_bytes(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
filename, encoding = decode_header(part.get_filename())[0]
if(encoding is None):
open(outputdir + '/' + filename, 'wb').write(part.get_payload(decode=True))
else:
open(outputdir + '/' + filename.decode(encoding), 'wb').write(part.get_payload(decode=True))
if(markRead):
m.uid("STORE", emailid, "+FLAGS", "(\Seen)")**
这是一个老问题,但我遇到了这个问题并且很难找到解决方案...也许这可以帮助其他人!
编辑:这仅涵盖将文件名“解码”为正确文件名的部分!
import re
import base64
import quopri
def encoded_words_to_text(encoded_words):
try:
encoded_word_regex = r'=\?{1}(.+)\?{1}([B|Q])\?{1}(.+)\?{1}='
charset, encoding, encoded_text = re.match(encoded_word_regex, encoded_words).groups()
if encoding is 'B':
byte_string = base64.b64decode(encoded_text)
elif encoding is 'Q':
byte_string = quopri.decodestring(encoded_text)
return byte_string.decode(charset)
except:
return encoded_words
结果:
test_string = '=?utf-8?B?SUJUIFB1cmNoYXNlIE9yZGVyLnBkZg==?='
encoded_words_to_text(test_string)
'IBT Purchase Order.pdf'
我有这个功能可以使用 imaplib 从给定的电子邮件下载所有附件
# Download all attachment files for a given email
def downloaAttachmentsInEmail(m, emailid, outputdir, markRead):
resp, data = m.uid("FETCH", emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_bytes(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
open(outputdir + '/' + part.get_filename(), 'wb').write(part.get_payload(decode=True)
if(markRead):
m.uid("STORE", emailid, "+FLAGS", "(\Seen)")
问题是当我尝试下载文件名中包含 UTF-8 字符的文件时它不起作用。我收到此错误,我猜这是因为 part.get_filename() 没有正确读取名称:
OSError: [Errno 22] Invalid argument: './temp//=?UTF-8?B?QkQgUmVsYXTDs3JpbyAywqogRmFzZS5kb2M=?=\r\n\t=?UTF-8?B?eA==?='
我该怎么做才能解决这个问题?
我找到了解决方案
# Download all attachment files for a given email
def downloaAttachmentsInEmail(m, emailid, outputdir, markRead):
resp, data = m.uid("FETCH", emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_bytes(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
filename, encoding = decode_header(part.get_filename())[0]
if(encoding is None):
open(outputdir + '/' + filename, 'wb').write(part.get_payload(decode=True))
else:
open(outputdir + '/' + filename.decode(encoding), 'wb').write(part.get_payload(decode=True))
if(markRead):
m.uid("STORE", emailid, "+FLAGS", "(\Seen)")**
这是一个老问题,但我遇到了这个问题并且很难找到解决方案...也许这可以帮助其他人!
编辑:这仅涵盖将文件名“解码”为正确文件名的部分!
import re
import base64
import quopri
def encoded_words_to_text(encoded_words):
try:
encoded_word_regex = r'=\?{1}(.+)\?{1}([B|Q])\?{1}(.+)\?{1}='
charset, encoding, encoded_text = re.match(encoded_word_regex, encoded_words).groups()
if encoding is 'B':
byte_string = base64.b64decode(encoded_text)
elif encoding is 'Q':
byte_string = quopri.decodestring(encoded_text)
return byte_string.decode(charset)
except:
return encoded_words
结果:
test_string = '=?utf-8?B?SUJUIFB1cmNoYXNlIE9yZGVyLnBkZg==?='
encoded_words_to_text(test_string)
'IBT Purchase Order.pdf'