使用 sendMail 函数从 url 数组中抓取价格

Price scraping from array of urls with sendMail function

我对抓取还很陌生,但主要思想很简单。 我想用一个网站上我感兴趣的产品制作一个 URL 的数组。

如果我想监控一个新产品,我会把新的 URL 放在数组中。

问题在这里:当我抓取价格时,它总是return给我当前的价格, 但是这样我就可以比较了,是现在便宜还是比上次价格高。

这是我目前对一项的测试解决方案:

import requests
from bs4 import BeautifulSoup
import smtplib

#Get list of URLS insted of just one
#Loop thru all urls in array and get name and price
#Store last price when inserting new item for monitoring
#Assign price to every single URL to know what is the current price


url = 'https://www.example.com'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'}
def check_price():
    page = requests.get(url, headers=headers)
    soup = BeautifulSoup(page.content, 'html.parser')
    title = soup.find(class_='base').get_text().strip()
    price = soup.find(class_='price').get_text().strip()
    replace_price = price.replace(",",".")
    converted_price = float(replace_price[0:4])
    print(converted_price)
    if(converted_price < 80):
        send_mail()
    

def send_mail():
    server = smtplib.SMTP('smtp.gmail.com', 587)
    server.ehlo()
    server.starttls()
    server.ehlo()

    server.login('example@gmail.com', 'example123')

    subject = 'Price change scraper'
    body = 'The price of the following item has just been changed: https://www.example.com/example.html'
    old_price = 'Old price is: 74,88  with VAT'
    msg = f"Subject: {subject}\n\n{body}\n\n{old_price}"
    server.sendmail(
        'example@gmail.com',
        'example@gmail.com',
        msg
    )
    print('Email has been sent successfully!') 
    server.quit() 


check_price()

url 抓取并初始化 price 到字典列表中。按照您的方式循环,并将 scraped priceinit price.

进行比较

定义要抓取的数据:

initData = [
    {'url':'https://www.example.com', 'price': 100},
    {'url':'https://www.example1.com', 'price': 200},
    {'url':'https://www.example2.com', 'price': 300},
    {'url':'https://www.example3.com', 'price': 400},
    {'url':'https://www.example4.com', 'price': 500}
] 

循环数据:

for item in initData:
        check_price(item)

抓取、比较并发送邮件:

def check_price(data):

    url = data['url']
    initPrice = data['price']
    ...
    if(converted_price < initPrice):
        send_mail()

例子

import requests
from bs4 import BeautifulSoup
import smtplib

#Get list of URLS insted of just one
#Loop thru all urls in array and get name and price
#Store last price when inserting new item for monitoring
#Assign price to every single URL to know what is the current price


initData = [
    {'url':'https://www.example.com', 'price': 100},
    {'url':'https://www.example1.com', 'price': 200},
    {'url':'https://www.example2.com', 'price': 300},
    {'url':'https://www.example3.com', 'price': 400},
    {'url':'https://www.example4.com', 'price': 500}
]


url = 'https://www.example.com'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'}


def check_price(data):
    
    url = data['url']
    initPrice = data['price']
    
    page = requests.get(url, headers=headers)
    soup = BeautifulSoup(page.content, 'html.parser')
    title = soup.find(class_='base').get_text().strip()
    price = soup.find(class_='price').get_text().strip()
    replace_price = price.replace(",",".")
    converted_price = float(replace_price[0:4])
    print(converted_price)
    if(converted_price < initPrice):
        data['convertedPrice'] = converted_price
        send_mail(data)       

def send_mail(data):
    server = smtplib.SMTP('smtp.gmail.com', 587)
    server.ehlo()
    server.starttls()
    server.ehlo()

    server.login('example@gmail.com', 'example123')

    subject = 'Price change scraper'
    body = 'The price of the following item has just been changed: https://www.example.com/example.html'
    old_price = 'Old price is: 74,88  with VAT'
    msg = f"Subject: {subject}\n\n{body}\n\n{old_price}"
    server.sendmail(
        'example@gmail.com',
        'example@gmail.com',
        msg
    )
    print('Email has been sent successfully!') 
    server.quit() 

for item in initData:
    check_price(item)