虽然抓取错误实例方法没有属性'__getitem__'
While scraping getting error instance method has no attribute '__getitem__'
我无法理解我收到此错误-> 实例方法没有属性 getitem。
我只是想抓取这个网站来提取部门名称。
import scrapy
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.selector import Selector
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from urlparse import urljoin
from amazon.items import AmazonItem
class delhiveryspider(CrawlSpider):
name = "amazon"
allowed_domains = ["amazon.in"]
start_urls = ["http://www.amazon.in"]
def parse(self,response):
sites = response.xpath('//div[@id="nav_browse_flyout"]')
items = []
for site in sites:
item = AmazonItem()
item['main_title'] = site.xpath('.//li[@id="nav_cat_0"]/text()').extract[0]
items.append(item)
return items
您需要调用extract()
然后获取第一项:
item['main_title'] = site.xpath('.//li[@id="nav_cat_0"]/text()').extract()[0]
# HERE ^
如果您希望每个项目都有一个单独的类别,请迭代它们:
for title in site.xpath('.//li[starts-with(@id, "nav_cat_")]/text()').extract():
item = AmazonItem()
item['main_title'] = title
items.append(item)
我无法理解我收到此错误-> 实例方法没有属性 getitem。 我只是想抓取这个网站来提取部门名称。
import scrapy
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.selector import Selector
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from urlparse import urljoin
from amazon.items import AmazonItem
class delhiveryspider(CrawlSpider):
name = "amazon"
allowed_domains = ["amazon.in"]
start_urls = ["http://www.amazon.in"]
def parse(self,response):
sites = response.xpath('//div[@id="nav_browse_flyout"]')
items = []
for site in sites:
item = AmazonItem()
item['main_title'] = site.xpath('.//li[@id="nav_cat_0"]/text()').extract[0]
items.append(item)
return items
您需要调用extract()
然后获取第一项:
item['main_title'] = site.xpath('.//li[@id="nav_cat_0"]/text()').extract()[0]
# HERE ^
如果您希望每个项目都有一个单独的类别,请迭代它们:
for title in site.xpath('.//li[starts-with(@id, "nav_cat_")]/text()').extract():
item = AmazonItem()
item['main_title'] = title
items.append(item)