使用 sendMail 函数从 url 数组中抓取价格
Price scraping from array of urls with sendMail function
我对抓取还很陌生,但主要思想很简单。
我想用一个网站上我感兴趣的产品制作一个 URL 的数组。
如果我想监控一个新产品,我会把新的 URL 放在数组中。
问题在这里:当我抓取价格时,它总是return给我当前的价格,
但是这样我就可以比较了,是现在便宜还是比上次价格高。
这是我目前对一项的测试解决方案:
import requests
from bs4 import BeautifulSoup
import smtplib
#Get list of URLS insted of just one
#Loop thru all urls in array and get name and price
#Store last price when inserting new item for monitoring
#Assign price to every single URL to know what is the current price
url = 'https://www.example.com'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'}
def check_price():
page = requests.get(url, headers=headers)
soup = BeautifulSoup(page.content, 'html.parser')
title = soup.find(class_='base').get_text().strip()
price = soup.find(class_='price').get_text().strip()
replace_price = price.replace(",",".")
converted_price = float(replace_price[0:4])
print(converted_price)
if(converted_price < 80):
send_mail()
def send_mail():
server = smtplib.SMTP('smtp.gmail.com', 587)
server.ehlo()
server.starttls()
server.ehlo()
server.login('example@gmail.com', 'example123')
subject = 'Price change scraper'
body = 'The price of the following item has just been changed: https://www.example.com/example.html'
old_price = 'Old price is: 74,88 with VAT'
msg = f"Subject: {subject}\n\n{body}\n\n{old_price}"
server.sendmail(
'example@gmail.com',
'example@gmail.com',
msg
)
print('Email has been sent successfully!')
server.quit()
check_price()
将 url
抓取并初始化 price
到字典列表中。按照您的方式循环,并将 scraped price
与 init price
.
进行比较
定义要抓取的数据:
initData = [
{'url':'https://www.example.com', 'price': 100},
{'url':'https://www.example1.com', 'price': 200},
{'url':'https://www.example2.com', 'price': 300},
{'url':'https://www.example3.com', 'price': 400},
{'url':'https://www.example4.com', 'price': 500}
]
循环数据:
for item in initData:
check_price(item)
抓取、比较并发送邮件:
def check_price(data):
url = data['url']
initPrice = data['price']
...
if(converted_price < initPrice):
send_mail()
例子
import requests
from bs4 import BeautifulSoup
import smtplib
#Get list of URLS insted of just one
#Loop thru all urls in array and get name and price
#Store last price when inserting new item for monitoring
#Assign price to every single URL to know what is the current price
initData = [
{'url':'https://www.example.com', 'price': 100},
{'url':'https://www.example1.com', 'price': 200},
{'url':'https://www.example2.com', 'price': 300},
{'url':'https://www.example3.com', 'price': 400},
{'url':'https://www.example4.com', 'price': 500}
]
url = 'https://www.example.com'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'}
def check_price(data):
url = data['url']
initPrice = data['price']
page = requests.get(url, headers=headers)
soup = BeautifulSoup(page.content, 'html.parser')
title = soup.find(class_='base').get_text().strip()
price = soup.find(class_='price').get_text().strip()
replace_price = price.replace(",",".")
converted_price = float(replace_price[0:4])
print(converted_price)
if(converted_price < initPrice):
data['convertedPrice'] = converted_price
send_mail(data)
def send_mail(data):
server = smtplib.SMTP('smtp.gmail.com', 587)
server.ehlo()
server.starttls()
server.ehlo()
server.login('example@gmail.com', 'example123')
subject = 'Price change scraper'
body = 'The price of the following item has just been changed: https://www.example.com/example.html'
old_price = 'Old price is: 74,88 with VAT'
msg = f"Subject: {subject}\n\n{body}\n\n{old_price}"
server.sendmail(
'example@gmail.com',
'example@gmail.com',
msg
)
print('Email has been sent successfully!')
server.quit()
for item in initData:
check_price(item)
我对抓取还很陌生,但主要思想很简单。 我想用一个网站上我感兴趣的产品制作一个 URL 的数组。
如果我想监控一个新产品,我会把新的 URL 放在数组中。
问题在这里:当我抓取价格时,它总是return给我当前的价格, 但是这样我就可以比较了,是现在便宜还是比上次价格高。
这是我目前对一项的测试解决方案:
import requests
from bs4 import BeautifulSoup
import smtplib
#Get list of URLS insted of just one
#Loop thru all urls in array and get name and price
#Store last price when inserting new item for monitoring
#Assign price to every single URL to know what is the current price
url = 'https://www.example.com'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'}
def check_price():
page = requests.get(url, headers=headers)
soup = BeautifulSoup(page.content, 'html.parser')
title = soup.find(class_='base').get_text().strip()
price = soup.find(class_='price').get_text().strip()
replace_price = price.replace(",",".")
converted_price = float(replace_price[0:4])
print(converted_price)
if(converted_price < 80):
send_mail()
def send_mail():
server = smtplib.SMTP('smtp.gmail.com', 587)
server.ehlo()
server.starttls()
server.ehlo()
server.login('example@gmail.com', 'example123')
subject = 'Price change scraper'
body = 'The price of the following item has just been changed: https://www.example.com/example.html'
old_price = 'Old price is: 74,88 with VAT'
msg = f"Subject: {subject}\n\n{body}\n\n{old_price}"
server.sendmail(
'example@gmail.com',
'example@gmail.com',
msg
)
print('Email has been sent successfully!')
server.quit()
check_price()
将 url
抓取并初始化 price
到字典列表中。按照您的方式循环,并将 scraped price
与 init price
.
定义要抓取的数据:
initData = [
{'url':'https://www.example.com', 'price': 100},
{'url':'https://www.example1.com', 'price': 200},
{'url':'https://www.example2.com', 'price': 300},
{'url':'https://www.example3.com', 'price': 400},
{'url':'https://www.example4.com', 'price': 500}
]
循环数据:
for item in initData:
check_price(item)
抓取、比较并发送邮件:
def check_price(data):
url = data['url']
initPrice = data['price']
...
if(converted_price < initPrice):
send_mail()
例子
import requests
from bs4 import BeautifulSoup
import smtplib
#Get list of URLS insted of just one
#Loop thru all urls in array and get name and price
#Store last price when inserting new item for monitoring
#Assign price to every single URL to know what is the current price
initData = [
{'url':'https://www.example.com', 'price': 100},
{'url':'https://www.example1.com', 'price': 200},
{'url':'https://www.example2.com', 'price': 300},
{'url':'https://www.example3.com', 'price': 400},
{'url':'https://www.example4.com', 'price': 500}
]
url = 'https://www.example.com'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'}
def check_price(data):
url = data['url']
initPrice = data['price']
page = requests.get(url, headers=headers)
soup = BeautifulSoup(page.content, 'html.parser')
title = soup.find(class_='base').get_text().strip()
price = soup.find(class_='price').get_text().strip()
replace_price = price.replace(",",".")
converted_price = float(replace_price[0:4])
print(converted_price)
if(converted_price < initPrice):
data['convertedPrice'] = converted_price
send_mail(data)
def send_mail(data):
server = smtplib.SMTP('smtp.gmail.com', 587)
server.ehlo()
server.starttls()
server.ehlo()
server.login('example@gmail.com', 'example123')
subject = 'Price change scraper'
body = 'The price of the following item has just been changed: https://www.example.com/example.html'
old_price = 'Old price is: 74,88 with VAT'
msg = f"Subject: {subject}\n\n{body}\n\n{old_price}"
server.sendmail(
'example@gmail.com',
'example@gmail.com',
msg
)
print('Email has been sent successfully!')
server.quit()
for item in initData:
check_price(item)