将抓取的结果并排分类成一行
Classify scraped results side by side into a row
所以我正在使用 python/scrapy 从网页中抓取数据。网页基本上由包含各种信息的 15 个块组成。我的蜘蛛重复遍历每个块以抓取一些特定内容。我对结果的内容很满意,但对数据的呈现方式不满意。我希望属于一个块的所有抓取信息都显示在一行中。你会从下面的截图中看到,同一个区块的结果并没有并排呈现,这就是我想要的。
def parse(self, response):
for i in response.css('span.dir'):
yield {'address': i.css('b::text').extract()}
for l in response.css('div.datos'):
yield {'area': l.css('i::text').extract()}
for x in response.css('div.opciones'):
yield {'price stable': x.css('span.eur::text').extract()}
for o in response.css('div.opciones'):
yield {'price drop': o.css('div.mp_pvpant.baja::text').extract()}
for y in response.css('div.opciones'):
yield {'price decreased': y.css('span.eur_m::text').extract()}
for u in response.css('div.datos'):
yield {'link': u.css('a::attr(href)').extract_first()}
您可以将提取的值附加到列表中,然后生成相同的值,就像这样
def parse(self, response):
# create more lists for the remaining fields
address = []
area = []
for i in response.css('span.dir'):
address.append(i.css('b::text').extract())
yield {'address':address}
for l in response.css('div.datos'):
area.append(l.css('i::text').extract())
yield {'area':area}
如果每行的结果数量相同,您可以这样做:
def parse(self, response):
addresses = []
areas = []
prices_stable = []
prices_drop = []
prices_decreased = []
links = []
for i in response.css('span.dir'):
addresses.append(i.css('b::text').extract())
for l in response.css('div.datos'):
areas.append(l.css('i::text').extract())
for x in response.css('div.opciones'):
prices_stable.append(x.css('span.eur::text').extract())
for o in response.css('div.opciones'):
prices_drop.append(o.css('div.mp_pvpant.baja::text').extract())
for y in response.css('div.opciones'):
prices_decreased.append(y.css('span.eur_m::text').extract())
for u in response.css('div.datos'):
links.append(u.css('a::attr(href)').extract_first())
for address, area, price_stable, price_drop, price_decreased, link in zip(addresses, areas, prices_stable, prices_drop, prices_decreased, links):
yield {
'address': address,
'area': area,
'price_stable': price_stable,
'price_drop': price_drop,
'price_decreased': price_decreased,
'link': link,
}
所以我正在使用 python/scrapy 从网页中抓取数据。网页基本上由包含各种信息的 15 个块组成。我的蜘蛛重复遍历每个块以抓取一些特定内容。我对结果的内容很满意,但对数据的呈现方式不满意。我希望属于一个块的所有抓取信息都显示在一行中。你会从下面的截图中看到,同一个区块的结果并没有并排呈现,这就是我想要的。
def parse(self, response):
for i in response.css('span.dir'):
yield {'address': i.css('b::text').extract()}
for l in response.css('div.datos'):
yield {'area': l.css('i::text').extract()}
for x in response.css('div.opciones'):
yield {'price stable': x.css('span.eur::text').extract()}
for o in response.css('div.opciones'):
yield {'price drop': o.css('div.mp_pvpant.baja::text').extract()}
for y in response.css('div.opciones'):
yield {'price decreased': y.css('span.eur_m::text').extract()}
for u in response.css('div.datos'):
yield {'link': u.css('a::attr(href)').extract_first()}
您可以将提取的值附加到列表中,然后生成相同的值,就像这样
def parse(self, response):
# create more lists for the remaining fields
address = []
area = []
for i in response.css('span.dir'):
address.append(i.css('b::text').extract())
yield {'address':address}
for l in response.css('div.datos'):
area.append(l.css('i::text').extract())
yield {'area':area}
如果每行的结果数量相同,您可以这样做:
def parse(self, response):
addresses = []
areas = []
prices_stable = []
prices_drop = []
prices_decreased = []
links = []
for i in response.css('span.dir'):
addresses.append(i.css('b::text').extract())
for l in response.css('div.datos'):
areas.append(l.css('i::text').extract())
for x in response.css('div.opciones'):
prices_stable.append(x.css('span.eur::text').extract())
for o in response.css('div.opciones'):
prices_drop.append(o.css('div.mp_pvpant.baja::text').extract())
for y in response.css('div.opciones'):
prices_decreased.append(y.css('span.eur_m::text').extract())
for u in response.css('div.datos'):
links.append(u.css('a::attr(href)').extract_first())
for address, area, price_stable, price_drop, price_decreased, link in zip(addresses, areas, prices_stable, prices_drop, prices_decreased, links):
yield {
'address': address,
'area': area,
'price_stable': price_stable,
'price_drop': price_drop,
'price_decreased': price_decreased,
'link': link,
}