试图抓取数据提供空结果
Trying to Scrape data provide empty result
我正在尝试从这些页面中抓取数据http://www.finistere.cuma.fr/fiches/finistere-federation-des-cuma 尝试抓取图片中显示的数据
from scrapy import Spider
from scrapy.http import Request
class AuthorSpider(Spider):
name = 'pushpa'
start_urls = ['http://www.cuma.fr/annuaires?page=1e']
def parse(self, response):
books = response.xpath("//h2/a/@href").extract()
for book in books:
url = response.urljoin(book)
yield Request(url, callback=self.parse_book)
def parse_book(self, response):
coordinate=response.xpath("//div[@class='adr']/text()").getall()
yield{
'coordoness':coordinate
}
阅读评论。
from scrapy import Spider
from scrapy.http import Request
class AuthorSpider(Spider):
name = 'pushpa'
start_urls = ['http://www.cuma.fr/annuaires?page=1e']
def parse(self, response):
books = response.xpath("//h2/a/@href").extract()
for book in books:
url = response.urljoin(book)
yield Request(url, callback=self.parse_book)
def parse_book(self, response):
# coordinate = response.xpath("//div[@class='adr']/text()").getall()
# replace '/text()' with '//text()' to get all the text inside div tag:
coordinate = response.xpath("//div[@class='adr']//text()").getall()
# strip the strings in the list:
coordinate = [i.strip() for i in coordinate]
# remove empty strings:
coordinate = [i for i in coordinate if i]
yield{
'coordoness': coordinate
}
我正在尝试从这些页面中抓取数据http://www.finistere.cuma.fr/fiches/finistere-federation-des-cuma 尝试抓取图片中显示的数据
from scrapy import Spider
from scrapy.http import Request
class AuthorSpider(Spider):
name = 'pushpa'
start_urls = ['http://www.cuma.fr/annuaires?page=1e']
def parse(self, response):
books = response.xpath("//h2/a/@href").extract()
for book in books:
url = response.urljoin(book)
yield Request(url, callback=self.parse_book)
def parse_book(self, response):
coordinate=response.xpath("//div[@class='adr']/text()").getall()
yield{
'coordoness':coordinate
}
阅读评论。
from scrapy import Spider
from scrapy.http import Request
class AuthorSpider(Spider):
name = 'pushpa'
start_urls = ['http://www.cuma.fr/annuaires?page=1e']
def parse(self, response):
books = response.xpath("//h2/a/@href").extract()
for book in books:
url = response.urljoin(book)
yield Request(url, callback=self.parse_book)
def parse_book(self, response):
# coordinate = response.xpath("//div[@class='adr']/text()").getall()
# replace '/text()' with '//text()' to get all the text inside div tag:
coordinate = response.xpath("//div[@class='adr']//text()").getall()
# strip the strings in the list:
coordinate = [i.strip() for i in coordinate]
# remove empty strings:
coordinate = [i for i in coordinate if i]
yield{
'coordoness': coordinate
}