如何检查新折扣并在检测到变化时发送到电报?
How to check for new discounts and send to telegram if changes detected?
我喜欢从 website and text me on telegram with a change on the website 中获取新的折扣。
这是有效的,但我收到了很多消息,我想更改脚本以检查网站上的特定 class
。
所以在 website 我想检查 <span class="space--ml-1 size--all-l size--fromW3-xl cept-discount">-49%</span>
如果该值介于 -65% 和 -99% 之间,我需要一条消息。这可能吗?检查更改的脚本如下:
import requests
from bs4 import BeautifulSoup
import difflib
import time
from datetime import datetime
import re
import os
import schedule
import cloudscraper
# target URL
url = "https://nl.pepper.com/groep/prijsfout"
# act like a browser
#headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
#headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'}
scraper = cloudscraper.create_scraper()
# Send a message via a telegram bot
def telegram_bot_sendtext(bot_message):
bot_token = '17XXXX32:AAFd5jXXXXXXXXXXXXC5UJgG5pses8'
bot_chatID = '-XXXXX'
send_text = 'https://api.telegram.org/bot' + bot_token + '/sendMessage?chat_id=' + bot_chatID + '&parse_mode=Markdown&text=' + bot_message
response = requests.get(send_text)
return response.json()
PrevVersion = ""
FirstRun = True
while True:
# download the page
response = scraper.get("https://nl.pepper.com/nieuw").content
# parse the downloaded homepage
soup = BeautifulSoup(response, 'html.parser')
# remove all scripts and styles
for script in soup(["script", "style"]):
script.extract()
soup = soup.get_text()
# compare the page text to the previous version
if PrevVersion != soup:
# on the first run - just memorize the page
if FirstRun == True:
PrevVersion = soup
FirstRun = False
print ("Start Monitoring "+url+ ""+ str(datetime.now()))
else:
print ("Changes detected at: "+ str(datetime.now()))
OldPage = PrevVersion.splitlines()
NewPage = soup.splitlines()
# compare versions and highlight changes using difflib
#d = difflib.Differ()
#diff = d.compare(OldPage, NewPage)
diff = difflib.context_diff(OldPage,NewPage,n=0)
out_text = "\n".join([ll.rstrip() for ll in '\n'.join(diff).splitlines() if ll.strip()])
print (out_text)
OldPage = NewPage
# Send the message (such as with a telegram bot provided below)
telegram_bot_sendtext("Nieuwe prijsfout op Pepper " + url + out_text )
# print ('\n'.join(diff))
PrevVersion = soup
else:
print( "No Changes "+ str(datetime.now()))
time.sleep(5)
continue
可能这个脚本中的cookie也有问题(或者没有定义。)
如果在 -65% 和 -99% 之间有任何折扣,一个简单的可能解决方案可能如下所示。
此功能正在使用您的 soup
并寻找一般折扣,returns True
如果您的范围内有任何折扣或 False
如果没有:
def get_discounts(soup):
for d in soup.select('.cept-discount'):
if d.text != '' and 65 < int(''.join(filter(str.isdigit, d.text))) < 99:
return True
else:
return False
get_discounts(soup)
注意 在调用之前调用函数 soup = soup.get_text()
- 顺序很重要,因为您将 soup
的内容更改为文本。
将文本存储在另一个/重命名的变量中可能会更好,例如souptext,...所以你可以肯定,soup
总是包含 BeautifulSoup
object
,它表示 document
作为嵌套数据结构。
所以你最终会遇到这样的事情:
import requests, time, difflib, os, re, schedule, cloudscraper
from bs4 import BeautifulSoup
from datetime import datetime
# target URL
url = "https://nl.pepper.com/groep/prijsfout"
# act like a browser
#headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
#headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'}
scraper = cloudscraper.create_scraper()
# Send a message via a telegram bot
def telegram_bot_sendtext(bot_message):
bot_token = '17XXXX32:AAFd5jXXXXXXXXXXXXC5UJgG5pses8'
bot_chatID = '-XXXXX'
send_text = 'https://api.telegram.org/bot' + bot_token + '/sendMessage?chat_id=' + bot_chatID + '&parse_mode=Markdown&text=' + bot_message
response = requests.get(send_text)
return response.json()
PrevVersion = ""
FirstRun = True
while True:
# download the page
response = scraper.get("https://nl.pepper.com/nieuw").content
# parse the downloaded homepage
soup = BeautifulSoup(response, 'html.parser')
# remove all scripts and styles
for script in soup(["script", "style"]):
script.extract()
discounts = get_discounts(soup)
soup = soup.get_text()
# compare the page text to the previous version and check if there are any discounts in your range
if PrevVersion != soup and discounts:
# on the first run - just memorize the page
if FirstRun == True:
PrevVersion = soup
FirstRun = False
print ("Start Monitoring "+url+ ""+ str(datetime.now()))
else:
print ("Changes detected at: "+ str(datetime.now()))
OldPage = PrevVersion.splitlines()
NewPage = soup.splitlines()
# compare versions and highlight changes using difflib
#d = difflib.Differ()
#diff = d.compare(OldPage, NewPage)
diff = difflib.context_diff(OldPage,NewPage,n=0)
out_text = "\n".join([ll.rstrip() for ll in '\n'.join(diff).splitlines() if ll.strip()])
print (out_text)
OldPage = NewPage
# Send the message (such as with a telegram bot provided below)
telegram_bot_sendtext("Nieuwe prijsfout op Pepper " + url + out_text )
# print ('\n'.join(diff))
PrevVersion = soup
else:
print( "No Changes "+ str(datetime.now()))
time.sleep(5)
continue
我喜欢从 website and text me on telegram with a change on the website 中获取新的折扣。
这是有效的,但我收到了很多消息,我想更改脚本以检查网站上的特定 class
。
所以在 website 我想检查 <span class="space--ml-1 size--all-l size--fromW3-xl cept-discount">-49%</span>
如果该值介于 -65% 和 -99% 之间,我需要一条消息。这可能吗?检查更改的脚本如下:
import requests
from bs4 import BeautifulSoup
import difflib
import time
from datetime import datetime
import re
import os
import schedule
import cloudscraper
# target URL
url = "https://nl.pepper.com/groep/prijsfout"
# act like a browser
#headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
#headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'}
scraper = cloudscraper.create_scraper()
# Send a message via a telegram bot
def telegram_bot_sendtext(bot_message):
bot_token = '17XXXX32:AAFd5jXXXXXXXXXXXXC5UJgG5pses8'
bot_chatID = '-XXXXX'
send_text = 'https://api.telegram.org/bot' + bot_token + '/sendMessage?chat_id=' + bot_chatID + '&parse_mode=Markdown&text=' + bot_message
response = requests.get(send_text)
return response.json()
PrevVersion = ""
FirstRun = True
while True:
# download the page
response = scraper.get("https://nl.pepper.com/nieuw").content
# parse the downloaded homepage
soup = BeautifulSoup(response, 'html.parser')
# remove all scripts and styles
for script in soup(["script", "style"]):
script.extract()
soup = soup.get_text()
# compare the page text to the previous version
if PrevVersion != soup:
# on the first run - just memorize the page
if FirstRun == True:
PrevVersion = soup
FirstRun = False
print ("Start Monitoring "+url+ ""+ str(datetime.now()))
else:
print ("Changes detected at: "+ str(datetime.now()))
OldPage = PrevVersion.splitlines()
NewPage = soup.splitlines()
# compare versions and highlight changes using difflib
#d = difflib.Differ()
#diff = d.compare(OldPage, NewPage)
diff = difflib.context_diff(OldPage,NewPage,n=0)
out_text = "\n".join([ll.rstrip() for ll in '\n'.join(diff).splitlines() if ll.strip()])
print (out_text)
OldPage = NewPage
# Send the message (such as with a telegram bot provided below)
telegram_bot_sendtext("Nieuwe prijsfout op Pepper " + url + out_text )
# print ('\n'.join(diff))
PrevVersion = soup
else:
print( "No Changes "+ str(datetime.now()))
time.sleep(5)
continue
可能这个脚本中的cookie也有问题(或者没有定义。)
如果在 -65% 和 -99% 之间有任何折扣,一个简单的可能解决方案可能如下所示。
此功能正在使用您的 soup
并寻找一般折扣,returns True
如果您的范围内有任何折扣或 False
如果没有:
def get_discounts(soup):
for d in soup.select('.cept-discount'):
if d.text != '' and 65 < int(''.join(filter(str.isdigit, d.text))) < 99:
return True
else:
return False
get_discounts(soup)
注意 在调用之前调用函数 soup = soup.get_text()
- 顺序很重要,因为您将 soup
的内容更改为文本。
将文本存储在另一个/重命名的变量中可能会更好,例如souptext,...所以你可以肯定,soup
总是包含 BeautifulSoup
object
,它表示 document
作为嵌套数据结构。
所以你最终会遇到这样的事情:
import requests, time, difflib, os, re, schedule, cloudscraper
from bs4 import BeautifulSoup
from datetime import datetime
# target URL
url = "https://nl.pepper.com/groep/prijsfout"
# act like a browser
#headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
#headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'}
scraper = cloudscraper.create_scraper()
# Send a message via a telegram bot
def telegram_bot_sendtext(bot_message):
bot_token = '17XXXX32:AAFd5jXXXXXXXXXXXXC5UJgG5pses8'
bot_chatID = '-XXXXX'
send_text = 'https://api.telegram.org/bot' + bot_token + '/sendMessage?chat_id=' + bot_chatID + '&parse_mode=Markdown&text=' + bot_message
response = requests.get(send_text)
return response.json()
PrevVersion = ""
FirstRun = True
while True:
# download the page
response = scraper.get("https://nl.pepper.com/nieuw").content
# parse the downloaded homepage
soup = BeautifulSoup(response, 'html.parser')
# remove all scripts and styles
for script in soup(["script", "style"]):
script.extract()
discounts = get_discounts(soup)
soup = soup.get_text()
# compare the page text to the previous version and check if there are any discounts in your range
if PrevVersion != soup and discounts:
# on the first run - just memorize the page
if FirstRun == True:
PrevVersion = soup
FirstRun = False
print ("Start Monitoring "+url+ ""+ str(datetime.now()))
else:
print ("Changes detected at: "+ str(datetime.now()))
OldPage = PrevVersion.splitlines()
NewPage = soup.splitlines()
# compare versions and highlight changes using difflib
#d = difflib.Differ()
#diff = d.compare(OldPage, NewPage)
diff = difflib.context_diff(OldPage,NewPage,n=0)
out_text = "\n".join([ll.rstrip() for ll in '\n'.join(diff).splitlines() if ll.strip()])
print (out_text)
OldPage = NewPage
# Send the message (such as with a telegram bot provided below)
telegram_bot_sendtext("Nieuwe prijsfout op Pepper " + url + out_text )
# print ('\n'.join(diff))
PrevVersion = soup
else:
print( "No Changes "+ str(datetime.now()))
time.sleep(5)
continue