如何使用python点击邮件link下载数据?

How to use python to click on an email link to download data?

我正在尝试从我的邮箱中读取一封特定的电子邮件。我想单击 'Click here' hyperlink 开始在我的笔记本电脑上下载 excel 文件。我正在尝试以下代码:

import smtplib
import time
import imaplib
import email
import traceback 

ORG_EMAIL   = "@gmail.com"
FROM_EMAIL  = "myemail" + ORG_EMAIL
FROM_PWD    = "password"
SMTP_SERVER = "imap.gmail.com"
SMTP_PORT   = 993

def read_email_from_gmail():
    try:
        mail = imaplib.IMAP4_SSL(SMTP_SERVER)
        mail.login(FROM_EMAIL,FROM_PWD)
        mail.select('inbox')

        data = mail.search(None, 'ALL')
        mail_ids = data[1]
        id_list = mail_ids[0].split()   
        first_email_id = int(id_list[0])
        latest_email_id = int(id_list[-1])

        for i in range(latest_email_id,first_email_id, -1):
            data = mail.fetch(str(i), '(RFC822)' )
            for response_part in data:
                arr = response_part[0]
                if isinstance(arr, tuple):
                    msg = email.message_from_string(str(arr[1],'unicode_escape'))
                    email_subject = msg['somesubject']
                    email_from = msg['igotemailfrom@something.com']
                    # print('From : ' + email_from + '\n')
                    # print('Subject : ' + email_subject + '\n')
    except Exception as e:
        traceback.print_exc() 
        print(str(e))
read_email_from_gmail()

有人可以帮助我如何从我正在提取的电子邮件中单击 link 'Click here to download data' 吗?

我强烈建议您获取电子邮件的 html,然后将其传递给 BeutifulSoup,这样您就可以使用选择器搜索元素。一旦你有了它,你就可以得到你正在寻找的 link,然后你可以向 link 发送请求以获取 excel 文件。

我正在使用 googleapiclient 库,但希望我的代码片段能给您一个想法。

from bs4 import BeautifulSoup

string_returned = get_message_using_id(message_id)['payload']['body']['data']
encoded_bytes = bytes(string_returned, encoding='utf-8')
string_configured = base64.urlsafe_b64decode(encoded_bytes).decode('utf-8')
email_contents = html.unescape(string_configured)

soup = BeautifulSoup(email_contents, 'html.parser')
links = soup.select('a[href]')
# The links array contains all of the links in the email
link = links[0].text
# This will request the url of the link, then you can do anything with it.
response = requests.get(link)

您可以使用imap工具 https://pypi.org/project/imap-tools/#id7

使用起来非常简单,简短明了:

pip install imap-tools

from imap_tools import MailBox
from imap_tools import AND, OR, NOT

# get list of email bodies from INBOX folder
with MailBox('imap.gmail.com').login('email', 'pwd', 'INBOX') as mailbox:
    bodies = [msg.html for msg in mailbox.fetch(AND(subject='your email subject'), reverse = True)]
   
from bs4 import BeautifulSoup

soup = BeautifulSoup(str(bodies))
links = []
for link in soup.findAll('a', attrs={'href': re.compile("^https://")}):
    links.append(link.get('href'))
links # in this you will have to check the link number starting from 0. 

result = links[5] # mine was 5th
result

import requests
resp = requests.get(result)

output = open('test.csv', 'wb')
output.write(resp.content)
output.close()