如何使用 python IMAP 提取多部分电子邮件的正文并保存附件?
How to extract the body of an multipart email and save the attachments using python IMAP?
我正在做一个项目,我会收到带有特定 'subject' 的电子邮件。有网友转发给我。正文由文本组成,但在原始电子邮件中,并且没有在转发行上方输入新文本。电子邮件的任何一部分也有附件。
我使用 python 和 IMAP 编写了以下代码,并且只有当电子邮件是新的而不是转发的电子邮件时才能存储附件和正文。
def getAllEmails(username, password, subject, fromEmail, folderName):
mail = imaplib.IMAP4_SSL("imap.outlook.com")
mail.login(username, password)
print("Login success..........")
mail.select("inbox")
result, data = mail.search(None, 'SUBJECT', '"{}"'.format(subject))
inbox_item_list_subject = data[0].split()
result, data = mail.search(None, 'FROM', '"{}"'.format(fromEmail))
inbox_item_list_sender = data[0].split()
inbox_item_list = list(set(inbox_item_list_subject) & set(inbox_item_list_sender))
counter = 0
for item in inbox_item_list:
counter+=1
result2, email_data = mail.fetch(item,'(RFC822)')
raw_email = email_data[0][1].decode("utf-8")
email_message = email.message_from_string(raw_email)
#getting information about the mail like to, from,subject, date.
to_ = email_message['To']
from_ = email_message['From']
subject_ = email_message['Subject']
date_ = email_message['date']
# setting the format to save in text file.
to_ = "to: "
from_ = "from: " + from_ + str("\n")
date_ = "date: " + date_ + str("\n")
subject__ = "subject: " + subject_ + str("\n")
# accessing the subparts of email_message
for part in email_message.walk():
if part.get_content_maintype == 'multipart':
continue
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
filename = part.get_filename()
ext = mimetypes.guess_extension(part.get_content_type())
# allowing pdf, jpg, png and doc format only
if ext == '.pdf' or ext == '.csv' or ext == '.png' or ext == '.docx' or ext == '.xlsx':
if filename:
save_path = os.path.join(os.getcwd(), folderName, subject_)
if not os.path.exists(save_path):
os.makedirs(save_path)
with open(os.path.join(save_path, filename), 'wb') as fp:
fp.write(part.get_payload(decode=True))
fp.close()
# getting the body part of the mail.
try:
body = part.get_payload(decode=True).decode()
except:
pass
# saving the required information in a file named as "textfile.txt".
if content_type == "text/plain" and "attachment" not in content_disposition:
save_path = os.path.join(os.getcwd(), folderName, subject_)
if not os.path.exists(save_path):
os.makedirs(save_path)
filename = "textfile.txt"
with open(os.path.join(save_path, filename), 'w+', encoding='utf-8') as fp:
fp.writelines(to_)
fp.writelines(from_)
fp.writelines(date_)
fp.writelines(subject__)
fp.writelines(body)
fp.close()
mail.close()
mail.logout()
即使是转发电子邮件,我也希望存储正文和附件??
似乎您已经有了提取附件的部分。
尝试使用此代码检索多部分电子邮件的正文。
您可能需要弄清楚如何将您的部分与此部分合并。
def getAll(username, password, folderName):
mail = imaplib.IMAP4_SSL("imap.outlook.com")
mail.login(username, password)
print("Login success..........")
mail.select("INBOX")
result, data = mail.search(None, '(FROM "user@gmail.com" SUBJECT "Subject-Name")')
for num in data[0].split():
h, d = mail.fetch(num, '(RFC822)')
raw_email = d[0][1].decode("utf-8")
message = email.message_from_string(raw_email)
email_from = str(make_header(decode_header(message['From'])))
subject = str(make_header(decode_header(message['Subject'])))
print("SUBJECT: "+ subject)
print("FROM: "+ email_from)
msg_encoding = 'iso-2022-jp'
if message.is_multipart() == False:
single = bytearray(message.get_payload(), msg_encoding)
body = single.decode(encoding = msg_encoding)
else:
multi = message.get_payload()[0]
body = multi.get_payload(decode=True).decode(encoding = msg_encoding)
body = re.sub('<[^<]+?>', '', body) # Remove special characters
print("Printing the body:" + body)
我正在做一个项目,我会收到带有特定 'subject' 的电子邮件。有网友转发给我。正文由文本组成,但在原始电子邮件中,并且没有在转发行上方输入新文本。电子邮件的任何一部分也有附件。
我使用 python 和 IMAP 编写了以下代码,并且只有当电子邮件是新的而不是转发的电子邮件时才能存储附件和正文。
def getAllEmails(username, password, subject, fromEmail, folderName):
mail = imaplib.IMAP4_SSL("imap.outlook.com")
mail.login(username, password)
print("Login success..........")
mail.select("inbox")
result, data = mail.search(None, 'SUBJECT', '"{}"'.format(subject))
inbox_item_list_subject = data[0].split()
result, data = mail.search(None, 'FROM', '"{}"'.format(fromEmail))
inbox_item_list_sender = data[0].split()
inbox_item_list = list(set(inbox_item_list_subject) & set(inbox_item_list_sender))
counter = 0
for item in inbox_item_list:
counter+=1
result2, email_data = mail.fetch(item,'(RFC822)')
raw_email = email_data[0][1].decode("utf-8")
email_message = email.message_from_string(raw_email)
#getting information about the mail like to, from,subject, date.
to_ = email_message['To']
from_ = email_message['From']
subject_ = email_message['Subject']
date_ = email_message['date']
# setting the format to save in text file.
to_ = "to: "
from_ = "from: " + from_ + str("\n")
date_ = "date: " + date_ + str("\n")
subject__ = "subject: " + subject_ + str("\n")
# accessing the subparts of email_message
for part in email_message.walk():
if part.get_content_maintype == 'multipart':
continue
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
filename = part.get_filename()
ext = mimetypes.guess_extension(part.get_content_type())
# allowing pdf, jpg, png and doc format only
if ext == '.pdf' or ext == '.csv' or ext == '.png' or ext == '.docx' or ext == '.xlsx':
if filename:
save_path = os.path.join(os.getcwd(), folderName, subject_)
if not os.path.exists(save_path):
os.makedirs(save_path)
with open(os.path.join(save_path, filename), 'wb') as fp:
fp.write(part.get_payload(decode=True))
fp.close()
# getting the body part of the mail.
try:
body = part.get_payload(decode=True).decode()
except:
pass
# saving the required information in a file named as "textfile.txt".
if content_type == "text/plain" and "attachment" not in content_disposition:
save_path = os.path.join(os.getcwd(), folderName, subject_)
if not os.path.exists(save_path):
os.makedirs(save_path)
filename = "textfile.txt"
with open(os.path.join(save_path, filename), 'w+', encoding='utf-8') as fp:
fp.writelines(to_)
fp.writelines(from_)
fp.writelines(date_)
fp.writelines(subject__)
fp.writelines(body)
fp.close()
mail.close()
mail.logout()
即使是转发电子邮件,我也希望存储正文和附件??
似乎您已经有了提取附件的部分。 尝试使用此代码检索多部分电子邮件的正文。
您可能需要弄清楚如何将您的部分与此部分合并。
def getAll(username, password, folderName):
mail = imaplib.IMAP4_SSL("imap.outlook.com")
mail.login(username, password)
print("Login success..........")
mail.select("INBOX")
result, data = mail.search(None, '(FROM "user@gmail.com" SUBJECT "Subject-Name")')
for num in data[0].split():
h, d = mail.fetch(num, '(RFC822)')
raw_email = d[0][1].decode("utf-8")
message = email.message_from_string(raw_email)
email_from = str(make_header(decode_header(message['From'])))
subject = str(make_header(decode_header(message['Subject'])))
print("SUBJECT: "+ subject)
print("FROM: "+ email_from)
msg_encoding = 'iso-2022-jp'
if message.is_multipart() == False:
single = bytearray(message.get_payload(), msg_encoding)
body = single.decode(encoding = msg_encoding)
else:
multi = message.get_payload()[0]
body = multi.get_payload(decode=True).decode(encoding = msg_encoding)
body = re.sub('<[^<]+?>', '', body) # Remove special characters
print("Printing the body:" + body)