将抓取的结果集保存到 CSV 文件中
Saving scraped result set into a CSV file
我写了一个小脚本,它接受 ebay 结果集并将每个字段存储在不同的变量中:link、价格、出价。
如何获取变量并将每个拍卖品的每个结果保存到 CSV 文件中,其中每一行代表不同的拍卖品?
例如:link、价格、出价
到目前为止,这是我的代码:
import requests, bs4
import csv
import requests
import pandas as pd
res = requests.get('http://www.ebay.com/sch/i.html?LH_Complete=1&LH_Sold=1&_from=R40&_sacat=0&_nkw=gerald%20ford%20autograph&rt=nc&LH_Auction=1&_trksid=p2045573.m1684')
res.raise_for_status()
soup=bs4.BeautifulSoup(res.text)
# grabs the link, selling price, and # of bids from historical auctions
links = soup.find_all(class_="vip")
prices = soup.find_all("span", "bold bidsold")
bids = soup.find_all("li", "lvformat")
import requests, bs4
import numpy as np
import requests
import pandas as pd
res = requests.get('http://www.ebay.com/sch/i.html? LH_Complete=1&LH_Sold=1&_from=R40&_sacat=0&_nkw=gerald%20ford%20autograph&r t=nc&LH_Auction=1&_trksid=p2045573.m1684')
res.raise_for_status()
soup=bs4.BeautifulSoup(res.text, "lxml")
# grabs the link, selling price, and # of bids from historical auctions
df = pd.DataFrame()
l = []
p = []
b = []
for links in soup.find_all(class_="vip"):
l.append(links)
for bids in soup.find_all("li", "lvformat"):
b.append(bids)
for prices in soup.find_all("span", "bold bidsold"):
p.append(prices)
x = np.array((l,b,p))
z = x.transpose()
df = pd.DataFrame(z)
df.to_csv('/Users/toasteez/ebay.csv')
这应该可以完成工作:
import csv
import requests
import bs4
res = requests.get('http://www.ebay.com/sch/i.html?LH_Complete=1&LH_Sold=1&_from=R40&_sacat=0&_nkw=gerald%20ford%20autograph&rt=nc&LH_Auction=1&_trksid=p2045573.m1684')
res.raise_for_status()
soup = bs4.BeautifulSoup(res.text)
# grab all the links and store its href destinations in a list
links = [e['href'] for e in soup.find_all(class_="vip")]
# grab all the bid spans and split its contents in order to get the number only
bids = [e.span.contents[0].split(' ')[0] for e in soup.find_all("li", "lvformat")]
# grab all the prices and store those in a list
prices = [e.contents[0] for e in soup.find_all("span", "bold bidsold")]
# zip each entry out of the lists we generated before in order to combine the entries
# belonging to each other and write the zipped elements to a list
l = [e for e in zip(links, prices, bids)]
# write each entry of the rowlist `l` to the csv output file
with open('ebay.csv', 'w') as csvfile:
w = csv.writer(csvfile)
for e in l:
w.writerow(e)
因此,您将获得一个 csv 文件,该文件以 ,
(逗号)作为分隔符。
我写了一个小脚本,它接受 ebay 结果集并将每个字段存储在不同的变量中:link、价格、出价。
如何获取变量并将每个拍卖品的每个结果保存到 CSV 文件中,其中每一行代表不同的拍卖品?
例如:link、价格、出价
到目前为止,这是我的代码:
import requests, bs4
import csv
import requests
import pandas as pd
res = requests.get('http://www.ebay.com/sch/i.html?LH_Complete=1&LH_Sold=1&_from=R40&_sacat=0&_nkw=gerald%20ford%20autograph&rt=nc&LH_Auction=1&_trksid=p2045573.m1684')
res.raise_for_status()
soup=bs4.BeautifulSoup(res.text)
# grabs the link, selling price, and # of bids from historical auctions
links = soup.find_all(class_="vip")
prices = soup.find_all("span", "bold bidsold")
bids = soup.find_all("li", "lvformat")
import requests, bs4
import numpy as np
import requests
import pandas as pd
res = requests.get('http://www.ebay.com/sch/i.html? LH_Complete=1&LH_Sold=1&_from=R40&_sacat=0&_nkw=gerald%20ford%20autograph&r t=nc&LH_Auction=1&_trksid=p2045573.m1684')
res.raise_for_status()
soup=bs4.BeautifulSoup(res.text, "lxml")
# grabs the link, selling price, and # of bids from historical auctions
df = pd.DataFrame()
l = []
p = []
b = []
for links in soup.find_all(class_="vip"):
l.append(links)
for bids in soup.find_all("li", "lvformat"):
b.append(bids)
for prices in soup.find_all("span", "bold bidsold"):
p.append(prices)
x = np.array((l,b,p))
z = x.transpose()
df = pd.DataFrame(z)
df.to_csv('/Users/toasteez/ebay.csv')
这应该可以完成工作:
import csv
import requests
import bs4
res = requests.get('http://www.ebay.com/sch/i.html?LH_Complete=1&LH_Sold=1&_from=R40&_sacat=0&_nkw=gerald%20ford%20autograph&rt=nc&LH_Auction=1&_trksid=p2045573.m1684')
res.raise_for_status()
soup = bs4.BeautifulSoup(res.text)
# grab all the links and store its href destinations in a list
links = [e['href'] for e in soup.find_all(class_="vip")]
# grab all the bid spans and split its contents in order to get the number only
bids = [e.span.contents[0].split(' ')[0] for e in soup.find_all("li", "lvformat")]
# grab all the prices and store those in a list
prices = [e.contents[0] for e in soup.find_all("span", "bold bidsold")]
# zip each entry out of the lists we generated before in order to combine the entries
# belonging to each other and write the zipped elements to a list
l = [e for e in zip(links, prices, bids)]
# write each entry of the rowlist `l` to the csv output file
with open('ebay.csv', 'w') as csvfile:
w = csv.writer(csvfile)
for e in l:
w.writerow(e)
因此,您将获得一个 csv 文件,该文件以 ,
(逗号)作为分隔符。