__init__() 得到了意外的关键字参数“_job”
__init__() got an unexpected keyword argument '_job'
我正在尝试将 scrapyd 与 scrapy 一起使用。
当我使用下面的代码时,它工作正常。
import scrapy
from scrapy.selector import Selector
from scrapy.spiders import CrawlSpider
from shikari.items import shikariItem
from scrapy.http import Request
class Radiate(scrapy.Spider) :
name = "run"
allowed_domains = ["google.com"]
start_urls = ['http://google.com']
def parse(self, response) :
item = shikariItem ()
sel = Selector (response)
item['url'] = response.url
return item
但是当我将它与 selenium 一起使用时,它不会
import scrapy
from pyvirtualdisplay import Display
from selenium import webdriver
from shikari.items import shikariItem
class Radiate(scrapy.Spider) :
name = "run"
allowed_domains = ["google.com"]
start_urls = ['http://google.com']
def __init__(self):
display = Display(visible=0, size=(800, 600))
display.start()
self.driver = webdriver.Chrome('/usr/bin/chromedriver')
def parse(self, response) :
item = shikariItem ()
item['url'] = self.driver.get(response.url)
return item
def teardown(self):
display.stop()
self.driver.close()
我的蜘蛛从不跑。在作业中,它列在已完成下方,在错误日志中我看到 exceptions.TypeError: __init__() got an unexpected keyword argument '_job'
.
这是完整的错误日志
2016-02-23 19:42:06 [scrapy] INFO: Scrapy 1.0.5 started (bot: scrapybot)
2016-02-23 19:42:06 [scrapy] INFO: Optional features available: ssl, http11
2016-02-23 19:42:06 [scrapy] INFO: Overridden settings: {'NEWSPIDER_MODULE': 'shikari.spiders', 'FEED_URI': 'file:///var/www/html/shikari/scrapyd-client-master/items/shikari/run/82776010da6511e5b08d1288e0cebe8d.jl', 'CONCURRENT_REQUESTS_PER_DOMAIN': 1, 'SPIDER_MODULES': ['shikari.spiders'], 'USER_AGENT': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0', 'FEED_FORMAT': 'csv', 'LOG_FILE': 'logs/shikari/run/82776010da6511e5b08d1288e0cebe8d.log', 'DOWNLOAD_DELAY': 4}
2016-02-23 19:42:06 [scrapy] INFO: Enabled extensions: CloseSpider, FeedExporter, TelnetConsole, LogStats, CoreStats, SpiderState
2016-02-23 19:42:06 [twisted] ERROR: Unhandled error in Deferred:
2016-02-23 19:42:06 [twisted] ERROR: Unhandled Error
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/scrapy/cmdline.py", line 150, in _run_command
cmd.run(args, opts)
File "/usr/local/lib/python2.7/dist-packages/scrapy/commands/crawl.py", line 57, in run
self.crawler_process.crawl(spname, **opts.spargs)
File "/usr/local/lib/python2.7/dist-packages/scrapy/crawler.py", line 153, in crawl
d = crawler.crawl(*args, **kwargs)
File "/usr/lib/python2.7/dist-packages/twisted/internet/defer.py", line 1237, in unwindGenerator
return _inlineCallbacks(None, gen, Deferred())
--- <exception caught here> ---
File "/usr/lib/python2.7/dist-packages/twisted/internet/defer.py", line 1099, in _inlineCallbacks
result = g.send(result)
File "/usr/local/lib/python2.7/dist-packages/scrapy/crawler.py", line 70, in crawl
self.spider = self._create_spider(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/scrapy/crawler.py", line 80, in _create_spider
return self.spidercls.from_crawler(self, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/scrapy/spiders/__init__.py", line 50, in from_crawler
spider = cls(*args, **kwargs)
exceptions.TypeError: __init__() got an unexpected keyword argument '_job'
我需要做什么才能避免收到错误消息?
您在您的子类中重新定义了 __init__
并将其签名从 scrapy.Spider
更改了。您也没有调用 scrapy.Spider
的 __init__
方法。
scrapy.Spider
source for init
我正在尝试将 scrapyd 与 scrapy 一起使用。
当我使用下面的代码时,它工作正常。
import scrapy
from scrapy.selector import Selector
from scrapy.spiders import CrawlSpider
from shikari.items import shikariItem
from scrapy.http import Request
class Radiate(scrapy.Spider) :
name = "run"
allowed_domains = ["google.com"]
start_urls = ['http://google.com']
def parse(self, response) :
item = shikariItem ()
sel = Selector (response)
item['url'] = response.url
return item
但是当我将它与 selenium 一起使用时,它不会
import scrapy
from pyvirtualdisplay import Display
from selenium import webdriver
from shikari.items import shikariItem
class Radiate(scrapy.Spider) :
name = "run"
allowed_domains = ["google.com"]
start_urls = ['http://google.com']
def __init__(self):
display = Display(visible=0, size=(800, 600))
display.start()
self.driver = webdriver.Chrome('/usr/bin/chromedriver')
def parse(self, response) :
item = shikariItem ()
item['url'] = self.driver.get(response.url)
return item
def teardown(self):
display.stop()
self.driver.close()
我的蜘蛛从不跑。在作业中,它列在已完成下方,在错误日志中我看到 exceptions.TypeError: __init__() got an unexpected keyword argument '_job'
.
这是完整的错误日志
2016-02-23 19:42:06 [scrapy] INFO: Scrapy 1.0.5 started (bot: scrapybot)
2016-02-23 19:42:06 [scrapy] INFO: Optional features available: ssl, http11
2016-02-23 19:42:06 [scrapy] INFO: Overridden settings: {'NEWSPIDER_MODULE': 'shikari.spiders', 'FEED_URI': 'file:///var/www/html/shikari/scrapyd-client-master/items/shikari/run/82776010da6511e5b08d1288e0cebe8d.jl', 'CONCURRENT_REQUESTS_PER_DOMAIN': 1, 'SPIDER_MODULES': ['shikari.spiders'], 'USER_AGENT': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0', 'FEED_FORMAT': 'csv', 'LOG_FILE': 'logs/shikari/run/82776010da6511e5b08d1288e0cebe8d.log', 'DOWNLOAD_DELAY': 4}
2016-02-23 19:42:06 [scrapy] INFO: Enabled extensions: CloseSpider, FeedExporter, TelnetConsole, LogStats, CoreStats, SpiderState
2016-02-23 19:42:06 [twisted] ERROR: Unhandled error in Deferred:
2016-02-23 19:42:06 [twisted] ERROR: Unhandled Error
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/scrapy/cmdline.py", line 150, in _run_command
cmd.run(args, opts)
File "/usr/local/lib/python2.7/dist-packages/scrapy/commands/crawl.py", line 57, in run
self.crawler_process.crawl(spname, **opts.spargs)
File "/usr/local/lib/python2.7/dist-packages/scrapy/crawler.py", line 153, in crawl
d = crawler.crawl(*args, **kwargs)
File "/usr/lib/python2.7/dist-packages/twisted/internet/defer.py", line 1237, in unwindGenerator
return _inlineCallbacks(None, gen, Deferred())
--- <exception caught here> ---
File "/usr/lib/python2.7/dist-packages/twisted/internet/defer.py", line 1099, in _inlineCallbacks
result = g.send(result)
File "/usr/local/lib/python2.7/dist-packages/scrapy/crawler.py", line 70, in crawl
self.spider = self._create_spider(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/scrapy/crawler.py", line 80, in _create_spider
return self.spidercls.from_crawler(self, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/scrapy/spiders/__init__.py", line 50, in from_crawler
spider = cls(*args, **kwargs)
exceptions.TypeError: __init__() got an unexpected keyword argument '_job'
我需要做什么才能避免收到错误消息?
您在您的子类中重新定义了 __init__
并将其签名从 scrapy.Spider
更改了。您也没有调用 scrapy.Spider
的 __init__
方法。
scrapy.Spider
source for init