Python - 从邮件中提取纯文本正文
Python - Extract the body from a mail in plain text
我只想提取邮件正文和 return。
我可以过滤字段并显示代码段而不是正文。
def GetMimeMessage(service, user_id, msg_id):
try:
message = service.users().messages().get(userId=user_id, id=msg_id, format='raw').execute()
print 'Message snippet: %s' % message['snippet']
msg_str = base64.urlsafe_b64decode(message['raw'].encode('ASCII'))
mime_msg = email.message_from_string(msg_str)
return mime_msg
except errors.HttpError, error:
print 'An error occurred: %s' % error
https://developers.google.com/gmail/api/v1/reference/users/messages/get
试试这个:
mail
参数是你的 mime_msg
变量
def get_mpart(mail):
maintype = mail.get_content_maintype()
if maintype == 'multipart':
for part in mail.get_payload():
# This includes mail body AND text file attachments.
if part.get_content_maintype() == 'text':
return part.get_payload()
# No text at all. This is also happens
return ""
elif maintype == 'text':
return mail.get_payload()
def get_mail_body(mail):
"""
There is no 'body' tag in mail, so separate function.
:param mail: Message object
:return: Body content
"""
body = ""
if mail.is_multipart():
# This does not work.
# for part in mail.get_payload():
# body += part.get_payload()
body = get_mpart(mail)
else:
body = mail.get_payload()
return body
base64url 编码的字符串在传递到解码函数之前需要进行一些更改,如下所示:
msg_str = base64.urlsafe_b64decode(message['raw'].replace('-_', '+/').encode('ASCII'))
看看这是否有帮助
谢谢。所以经过一些修改,这里的解决方案:
def GetMessageBody(service, user_id, msg_id):
try:
message = service.users().messages().get(userId=user_id, id=msg_id, format='raw').execute()
msg_str = base64.urlsafe_b64decode(message['raw'].encode('ASCII'))
mime_msg = email.message_from_string(msg_str)
messageMainType = mime_msg.get_content_maintype()
if messageMainType == 'multipart':
for part in mime_msg.get_payload():
if part.get_content_maintype() == 'text':
return part.get_payload()
return ""
elif messageMainType == 'text':
return mime_msg.get_payload()
except errors.HttpError, error:
print 'An error occurred: %s' % error
我只想提取邮件正文和 return。 我可以过滤字段并显示代码段而不是正文。
def GetMimeMessage(service, user_id, msg_id):
try:
message = service.users().messages().get(userId=user_id, id=msg_id, format='raw').execute()
print 'Message snippet: %s' % message['snippet']
msg_str = base64.urlsafe_b64decode(message['raw'].encode('ASCII'))
mime_msg = email.message_from_string(msg_str)
return mime_msg
except errors.HttpError, error:
print 'An error occurred: %s' % error
https://developers.google.com/gmail/api/v1/reference/users/messages/get
试试这个:
mail
参数是你的 mime_msg
变量
def get_mpart(mail):
maintype = mail.get_content_maintype()
if maintype == 'multipart':
for part in mail.get_payload():
# This includes mail body AND text file attachments.
if part.get_content_maintype() == 'text':
return part.get_payload()
# No text at all. This is also happens
return ""
elif maintype == 'text':
return mail.get_payload()
def get_mail_body(mail):
"""
There is no 'body' tag in mail, so separate function.
:param mail: Message object
:return: Body content
"""
body = ""
if mail.is_multipart():
# This does not work.
# for part in mail.get_payload():
# body += part.get_payload()
body = get_mpart(mail)
else:
body = mail.get_payload()
return body
base64url 编码的字符串在传递到解码函数之前需要进行一些更改,如下所示:
msg_str = base64.urlsafe_b64decode(message['raw'].replace('-_', '+/').encode('ASCII'))
看看这是否有帮助
谢谢。所以经过一些修改,这里的解决方案:
def GetMessageBody(service, user_id, msg_id):
try:
message = service.users().messages().get(userId=user_id, id=msg_id, format='raw').execute()
msg_str = base64.urlsafe_b64decode(message['raw'].encode('ASCII'))
mime_msg = email.message_from_string(msg_str)
messageMainType = mime_msg.get_content_maintype()
if messageMainType == 'multipart':
for part in mime_msg.get_payload():
if part.get_content_maintype() == 'text':
return part.get_payload()
return ""
elif messageMainType == 'text':
return mime_msg.get_payload()
except errors.HttpError, error:
print 'An error occurred: %s' % error