pythonanywhere 中的网页抓取不起作用
web scraping in pythonanywhere not working
我需要一个小网络爬虫的帮助,我已经编写并放入我的 pythonanywhere 帐户中,每天让它 运行 几次。
这是我的代码:
import requests
from bs4 import BeautifulSoup
import time
import random
list_all_results = []
for i in range(1, 3):
time.sleep(random.uniform(1.5, 2))
print("Scraping page " + str(i) + "/745")
try:
URL = "https://www.futbin.com/players?page=" + str(i)
platform = "pc"
cookies = {"platform": platform}
page = requests.get(URL, cookies=cookies)
soup = BeautifulSoup(page.content, "html.parser")
result_names = soup.find_all("a", attrs={"class": "player_name_players_table"})
result_ratings = soup.find_all(
"span",
attrs={"class": lambda r: r.startswith("form rating ut21") if r else False},
)
result_rarity = soup.find_all("td", {"class": "mobile-hide-table-col"})
result_prices_pc = soup.find_all(
"span", attrs={"class": "pc_color font-weight-bold"}
)
list_names = []
list_ratings = []
list_rarities = []
list_prices = []
for name in result_names:
list_names.append(name.text)
for rating in result_ratings:
list_ratings.append(rating.text)
for rarity in result_rarity:
list_rarities.append(rarity.text)
for price in result_prices_pc:
n = price.text.strip()
if "K" in n:
n2 = n.replace("K", "")
full_int = int(float(n2) * 1000)
list_prices.append(full_int)
elif "M" in n:
n2 = n.replace("M", "")
full_int = int(float(n2) * 1000000)
list_prices.append(full_int)
else:
list_prices.append(int(price.text.strip()))
int_list_length = len(list_names)
for i in range(0, int_list_length):
list_all_results.append(
tuple(
(list_names[i], list_ratings[i], list_rarities[i], list_prices[i])
)
)
with open("/home/exec85/scrape/pc.txt", "a") as f: # create new .txt file and write content to file
f.write(f"{list_all_results}")
except:
pass
print("FINISHED")
出于某种原因,我没有打印任何结果,所以我假设没有任何内容被抓取,也没有创建 .txt 文件。
即使我手动创建 .txt 文件,它也不会被填充。
运行 我本地机器上的脚本一切正常。
您的代码运行良好,但如果您希望代码在 PythonAnywhere 上运行,则需要 a paid account but you can reach this site list.
我需要一个小网络爬虫的帮助,我已经编写并放入我的 pythonanywhere 帐户中,每天让它 运行 几次。
这是我的代码:
import requests
from bs4 import BeautifulSoup
import time
import random
list_all_results = []
for i in range(1, 3):
time.sleep(random.uniform(1.5, 2))
print("Scraping page " + str(i) + "/745")
try:
URL = "https://www.futbin.com/players?page=" + str(i)
platform = "pc"
cookies = {"platform": platform}
page = requests.get(URL, cookies=cookies)
soup = BeautifulSoup(page.content, "html.parser")
result_names = soup.find_all("a", attrs={"class": "player_name_players_table"})
result_ratings = soup.find_all(
"span",
attrs={"class": lambda r: r.startswith("form rating ut21") if r else False},
)
result_rarity = soup.find_all("td", {"class": "mobile-hide-table-col"})
result_prices_pc = soup.find_all(
"span", attrs={"class": "pc_color font-weight-bold"}
)
list_names = []
list_ratings = []
list_rarities = []
list_prices = []
for name in result_names:
list_names.append(name.text)
for rating in result_ratings:
list_ratings.append(rating.text)
for rarity in result_rarity:
list_rarities.append(rarity.text)
for price in result_prices_pc:
n = price.text.strip()
if "K" in n:
n2 = n.replace("K", "")
full_int = int(float(n2) * 1000)
list_prices.append(full_int)
elif "M" in n:
n2 = n.replace("M", "")
full_int = int(float(n2) * 1000000)
list_prices.append(full_int)
else:
list_prices.append(int(price.text.strip()))
int_list_length = len(list_names)
for i in range(0, int_list_length):
list_all_results.append(
tuple(
(list_names[i], list_ratings[i], list_rarities[i], list_prices[i])
)
)
with open("/home/exec85/scrape/pc.txt", "a") as f: # create new .txt file and write content to file
f.write(f"{list_all_results}")
except:
pass
print("FINISHED")
出于某种原因,我没有打印任何结果,所以我假设没有任何内容被抓取,也没有创建 .txt 文件。 即使我手动创建 .txt 文件,它也不会被填充。
运行 我本地机器上的脚本一切正常。
您的代码运行良好,但如果您希望代码在 PythonAnywhere 上运行,则需要 a paid account but you can reach this site list.