如何使用python点击邮件link下载数据?
How to use python to click on an email link to download data?
我正在尝试从我的邮箱中读取一封特定的电子邮件。我想单击 'Click here' hyperlink 开始在我的笔记本电脑上下载 excel 文件。我正在尝试以下代码:
import smtplib
import time
import imaplib
import email
import traceback
ORG_EMAIL = "@gmail.com"
FROM_EMAIL = "myemail" + ORG_EMAIL
FROM_PWD = "password"
SMTP_SERVER = "imap.gmail.com"
SMTP_PORT = 993
def read_email_from_gmail():
try:
mail = imaplib.IMAP4_SSL(SMTP_SERVER)
mail.login(FROM_EMAIL,FROM_PWD)
mail.select('inbox')
data = mail.search(None, 'ALL')
mail_ids = data[1]
id_list = mail_ids[0].split()
first_email_id = int(id_list[0])
latest_email_id = int(id_list[-1])
for i in range(latest_email_id,first_email_id, -1):
data = mail.fetch(str(i), '(RFC822)' )
for response_part in data:
arr = response_part[0]
if isinstance(arr, tuple):
msg = email.message_from_string(str(arr[1],'unicode_escape'))
email_subject = msg['somesubject']
email_from = msg['igotemailfrom@something.com']
# print('From : ' + email_from + '\n')
# print('Subject : ' + email_subject + '\n')
except Exception as e:
traceback.print_exc()
print(str(e))
read_email_from_gmail()
有人可以帮助我如何从我正在提取的电子邮件中单击 link 'Click here to download data' 吗?
我强烈建议您获取电子邮件的 html,然后将其传递给 BeutifulSoup,这样您就可以使用选择器搜索元素。一旦你有了它,你就可以得到你正在寻找的 link,然后你可以向 link 发送请求以获取 excel 文件。
我正在使用 googleapiclient 库,但希望我的代码片段能给您一个想法。
from bs4 import BeautifulSoup
string_returned = get_message_using_id(message_id)['payload']['body']['data']
encoded_bytes = bytes(string_returned, encoding='utf-8')
string_configured = base64.urlsafe_b64decode(encoded_bytes).decode('utf-8')
email_contents = html.unescape(string_configured)
soup = BeautifulSoup(email_contents, 'html.parser')
links = soup.select('a[href]')
# The links array contains all of the links in the email
link = links[0].text
# This will request the url of the link, then you can do anything with it.
response = requests.get(link)
您可以使用imap工具
https://pypi.org/project/imap-tools/#id7
使用起来非常简单,简短明了:
pip install imap-tools
from imap_tools import MailBox
from imap_tools import AND, OR, NOT
# get list of email bodies from INBOX folder
with MailBox('imap.gmail.com').login('email', 'pwd', 'INBOX') as mailbox:
bodies = [msg.html for msg in mailbox.fetch(AND(subject='your email subject'), reverse = True)]
from bs4 import BeautifulSoup
soup = BeautifulSoup(str(bodies))
links = []
for link in soup.findAll('a', attrs={'href': re.compile("^https://")}):
links.append(link.get('href'))
links # in this you will have to check the link number starting from 0.
result = links[5] # mine was 5th
result
import requests
resp = requests.get(result)
output = open('test.csv', 'wb')
output.write(resp.content)
output.close()
我正在尝试从我的邮箱中读取一封特定的电子邮件。我想单击 'Click here' hyperlink 开始在我的笔记本电脑上下载 excel 文件。我正在尝试以下代码:
import smtplib
import time
import imaplib
import email
import traceback
ORG_EMAIL = "@gmail.com"
FROM_EMAIL = "myemail" + ORG_EMAIL
FROM_PWD = "password"
SMTP_SERVER = "imap.gmail.com"
SMTP_PORT = 993
def read_email_from_gmail():
try:
mail = imaplib.IMAP4_SSL(SMTP_SERVER)
mail.login(FROM_EMAIL,FROM_PWD)
mail.select('inbox')
data = mail.search(None, 'ALL')
mail_ids = data[1]
id_list = mail_ids[0].split()
first_email_id = int(id_list[0])
latest_email_id = int(id_list[-1])
for i in range(latest_email_id,first_email_id, -1):
data = mail.fetch(str(i), '(RFC822)' )
for response_part in data:
arr = response_part[0]
if isinstance(arr, tuple):
msg = email.message_from_string(str(arr[1],'unicode_escape'))
email_subject = msg['somesubject']
email_from = msg['igotemailfrom@something.com']
# print('From : ' + email_from + '\n')
# print('Subject : ' + email_subject + '\n')
except Exception as e:
traceback.print_exc()
print(str(e))
read_email_from_gmail()
有人可以帮助我如何从我正在提取的电子邮件中单击 link 'Click here to download data' 吗?
我强烈建议您获取电子邮件的 html,然后将其传递给 BeutifulSoup,这样您就可以使用选择器搜索元素。一旦你有了它,你就可以得到你正在寻找的 link,然后你可以向 link 发送请求以获取 excel 文件。
我正在使用 googleapiclient 库,但希望我的代码片段能给您一个想法。
from bs4 import BeautifulSoup
string_returned = get_message_using_id(message_id)['payload']['body']['data']
encoded_bytes = bytes(string_returned, encoding='utf-8')
string_configured = base64.urlsafe_b64decode(encoded_bytes).decode('utf-8')
email_contents = html.unescape(string_configured)
soup = BeautifulSoup(email_contents, 'html.parser')
links = soup.select('a[href]')
# The links array contains all of the links in the email
link = links[0].text
# This will request the url of the link, then you can do anything with it.
response = requests.get(link)
您可以使用imap工具 https://pypi.org/project/imap-tools/#id7
使用起来非常简单,简短明了:
pip install imap-tools
from imap_tools import MailBox
from imap_tools import AND, OR, NOT
# get list of email bodies from INBOX folder
with MailBox('imap.gmail.com').login('email', 'pwd', 'INBOX') as mailbox:
bodies = [msg.html for msg in mailbox.fetch(AND(subject='your email subject'), reverse = True)]
from bs4 import BeautifulSoup
soup = BeautifulSoup(str(bodies))
links = []
for link in soup.findAll('a', attrs={'href': re.compile("^https://")}):
links.append(link.get('href'))
links # in this you will have to check the link number starting from 0.
result = links[5] # mine was 5th
result
import requests
resp = requests.get(result)
output = open('test.csv', 'wb')
output.write(resp.content)
output.close()