对非对象的扭曲弱引用 python
Twisted weak reference to non object python
我目前正在使用 scrapy 构建一个 scraper,不幸的是它失败了并且有以下错误日志。我尝试 运行 它与爬虫运行器和爬虫进程一起使用,但两个版本都失败了。我试图弄清楚我是否错误地使用了扭曲,但我认为我做对了。
2018-04-18 23:55:46 [twisted] CRITICAL:
Traceback (most recent call last):
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/twisted/internet/defer.py", line 1386, in _inlineCallbacks
result = g.send(result)
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/scrapy/crawler.py", line 79, in crawl
self.spider = self._create_spider(*args, **kwargs)
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/scrapy/crawler.py", line 102, in _create_spider
return self.spidercls.from_crawler(self, *args, **kwargs)
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/scrapy/spiders/__init__.py", line 52, in from_crawler
spider._set_crawler(crawler)
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/scrapy/spiders/__init__.py", line 67, in _set_crawler
crawler.signals.connect(self.close, signals.spider_closed)
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/scrapy/signalmanager.py", line 26, in connect
return dispatcher.connect(receiver, signal, **kwargs)
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/pydispatch/dispatcher.py", line 130, in connect
receiver = saferef.safeRef(receiver, onDelete=_removeReceiver)
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/pydispatch/saferef.py", line 32, in safeRef
return weakref.ref(target, onDelete)
TypeError: cannot create weak reference to 'NoneType' object
我的代码是这样的
import scrapy
from twisted.internet import reactor, defer
from scrapy.crawler import CrawlerRunner
from scrapy.crawler import CrawlerProcess
from scrapy.utils.log import configure_logging
from classes import cars
LINKS = []
CARS = []
class AutoSpiderLinks(scrapy.Spider):
name = "Auto_get_links"
ROOT_URL = "https://www.somewebsite"
global LINKS
def geturls(self):
main_url = "https://www.somewebsite"
target_url = []
for x in range(1, 2):
target_url.append(main_url + "&page=" + str(x))
print(target_url.append(main_url + "&page=" + str(x)))
return target_url
def start_requests(self):
urls = AutoSpiderLinks.geturls(self)
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
important_divs = response.css('div.cldt-summary-titles').extract()
AutoSpiderLinks.convert(self, important_divs)
def main():
configure_logging()
runner = CrawlerRunner()
runner.crawl(AutoSpiderLinks)
runner.crawl(DeepSpider)
d = runner.join()
d.addBoth(lambda _: reactor.stop())
if __name__ == '__main__'
main()
感谢您的帮助
所以我想通了。显然你不能在 main 函数中配置线程。 Reactor 直接解决了这个问题。
@defer.inlineCallbacks
def main():
configure_logging()
runner = CrawlerRunner()
yield runner.crawl(AutoSpiderLinks)
yield runner.crawl(DeepSpider)
reactor.stop()
if __name__ == '__main__':
main()
reactor.run()
我目前正在使用 scrapy 构建一个 scraper,不幸的是它失败了并且有以下错误日志。我尝试 运行 它与爬虫运行器和爬虫进程一起使用,但两个版本都失败了。我试图弄清楚我是否错误地使用了扭曲,但我认为我做对了。
2018-04-18 23:55:46 [twisted] CRITICAL:
Traceback (most recent call last):
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/twisted/internet/defer.py", line 1386, in _inlineCallbacks
result = g.send(result)
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/scrapy/crawler.py", line 79, in crawl
self.spider = self._create_spider(*args, **kwargs)
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/scrapy/crawler.py", line 102, in _create_spider
return self.spidercls.from_crawler(self, *args, **kwargs)
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/scrapy/spiders/__init__.py", line 52, in from_crawler
spider._set_crawler(crawler)
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/scrapy/spiders/__init__.py", line 67, in _set_crawler
crawler.signals.connect(self.close, signals.spider_closed)
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/scrapy/signalmanager.py", line 26, in connect
return dispatcher.connect(receiver, signal, **kwargs)
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/pydispatch/dispatcher.py", line 130, in connect
receiver = saferef.safeRef(receiver, onDelete=_removeReceiver)
File "/home/flo/PycharmProjects/Tensortest/Car_Analysis/lib/python3.6/site-packages/pydispatch/saferef.py", line 32, in safeRef
return weakref.ref(target, onDelete)
TypeError: cannot create weak reference to 'NoneType' object
我的代码是这样的
import scrapy
from twisted.internet import reactor, defer
from scrapy.crawler import CrawlerRunner
from scrapy.crawler import CrawlerProcess
from scrapy.utils.log import configure_logging
from classes import cars
LINKS = []
CARS = []
class AutoSpiderLinks(scrapy.Spider):
name = "Auto_get_links"
ROOT_URL = "https://www.somewebsite"
global LINKS
def geturls(self):
main_url = "https://www.somewebsite"
target_url = []
for x in range(1, 2):
target_url.append(main_url + "&page=" + str(x))
print(target_url.append(main_url + "&page=" + str(x)))
return target_url
def start_requests(self):
urls = AutoSpiderLinks.geturls(self)
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
important_divs = response.css('div.cldt-summary-titles').extract()
AutoSpiderLinks.convert(self, important_divs)
def main():
configure_logging()
runner = CrawlerRunner()
runner.crawl(AutoSpiderLinks)
runner.crawl(DeepSpider)
d = runner.join()
d.addBoth(lambda _: reactor.stop())
if __name__ == '__main__'
main()
感谢您的帮助
所以我想通了。显然你不能在 main 函数中配置线程。 Reactor 直接解决了这个问题。
@defer.inlineCallbacks
def main():
configure_logging()
runner = CrawlerRunner()
yield runner.crawl(AutoSpiderLinks)
yield runner.crawl(DeepSpider)
reactor.stop()
if __name__ == '__main__':
main()
reactor.run()