从 python 中的 EMAIL 中提取本身属于 MSG 类型的附件

Extract Attachment which itself is of type MSG from an EMAIL in python

我需要从电子邮件中提取 msg 类型的附件并将 MSG 附件保存到 python 中的某个位置。

我编写的脚本适用于除 outlook 项目之外的几乎所有类型的文件

def parse_attachment(message_part):
    content_disposition = message_part.get("Content-Disposition", None)
    if content_disposition:
        dispositions = content_disposition.strip().split(";")
        if bool(content_disposition and (dispositions[0].lower() == "attachment" or dispositions[0].lower() == "inline")):

            file_data = message_part.get_payload(decode=True)
            debug(message_part)
            attachment = {}
            attachment['data'] = file_data
            attachment['content_type'] = message_part.get_content_type()
            attachment['size'] = len(file_data)

            for param in dispositions[1:]:
                name,value = param.split("=")
                name = name.lower().strip()
                value = value.strip().strip("\"")

                if name == "filename":
                    attachment['name'] = value
                elif name == "creation-date":
                    attachment['creation-date'] = value
                elif name == "modification-date":
                    attachment['modification-date'] = value
                elif name == "size":
                    attachment['size'] = value
            return attachment

    return None

我们必须单独处理电子邮件附件。但是,如果我们使用 walk(),它是一个通用生成器,可用于迭代消息对象树的所有 部分和子部分 ,在 深度优先遍历顺序,我们最终也解析了附件邮件。

因此,我们将不得不使用 get_payload() 来获取电子邮件的每个单独部分。以下是我们如何解析电子邮件附件 -

def get_subject(msgobj) :
    subject = None
    if msgobj['Subject'] is not None:
        decodefrag = decode_header(msgobj['Subject'])
        subj_fragments = []
        for s , enc in decodefrag:
            if enc:
                s = unicode(s , enc).encode('utf8','replace')
            subj_fragments.append(s)
        subject = ''.join(subj_fragments)
        subject = re.sub('\n', '', subject)
    return subject

def get_msg_file_as_attachment(message_part):
    attachment = {}
    attachment['data'] = message_part.get_payload()[0].as_string(unixfrom=True)
    attachment['content_type'] = message_part.get_content_type()
    attachment['name'] = get_subject(message_part.get_payload()[0])
    attachment['name'] +=  '.eml'
    attachment['size'] = len(attachment['data'])

    return attachment

def parse_attachment(message_part):
    content_disposition = message_part.get("Content-Disposition", None)
    content_type = message_part.get_content_type()
    if content_disposition:
        dispositions = content_disposition.strip().split(";")
        if bool(content_disposition and (dispositions[0].lower() == "attachment" or dispositions[0].lower() == "inline")):
            if (content_type.lower().strip() == 'message/rfc822'):
                return get_msg_file_as_attachment(message_part)

            else:
                file_data = message_part.get_payload(decode=True)
                attachment = {}
                attachment['data'] = file_data
                attachment['content_type'] = content_type
                attachment['size'] = len(file_data)
                attachment['name'] = message_part.get_filename()
                return attachment

    return None