使用 python PhantomJS 和 Green GreenPly 崩溃

PhantonJS and Green GreenPile crash using python

我正在尝试使用 PhantomJS 的多个实例并在线程之间使用驱动程序而不是销毁它并一次又一次地创建进程:

import sys
from datetime import datetime
import eventlet
from helpers import log, make_request
import settings
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import redis
import lxml.html as html

pool = eventlet.GreenPool(settings.max_threads)
pile = eventlet.GreenPile(pool)
redis = redis.StrictRedis(host='localhost', port=6379, db=0)        

def begin_crawl_phantomJSV2():

    url = redis.spop("queue")
    if None == url:
        return

    driver = webdriver.PhantomJS();

    process_urlv2(driver, url)

def process_urlv2(driver, url):       

    driver.get(url)

    ## some work

    url = redis.spop("queue")
    if None == url:
        driver.close()
        driver.quit()
        return

    pile.spawn(process_urlv2(driver, url))


if __name__ == '__main__':    

    timea = datetime.now()
    log("Beginning crawl at {}".format(timea))
    redis.sadd("queue", "http://linka.com")
    redis.sadd("queue", "http://linkb.com")    

    [pile.spawn(begin_crawl_phantomJSV2) for _ in range(1)]
    pool.waitall()

并出现以下错误:

Traceback (most recent call last):
  File "C:\Python27\lib\site-packages\eventlet\hubs\hub.py", line 457, in fire_timers
    timer()
  File "C:\Python27\lib\site-packages\eventlet\hubs\timer.py", line 58, in __call__
    cb(*args, **kw)
  File "C:\Python27\lib\site-packages\eventlet\greenthread.py", line 214, in main
    result = function(*args, **kwargs)
TypeError: 'NoneType' object is not callable

将方法更改为时:

def begin_crawl_phantomJS():

    driver = webdriver.PhantomJS();
    url = redis.spop("queue")
    if None == url:
        return

    process_url(driver, url)
    driver.close()
    driver.quit()
    pile.spawn(begin_crawl_phantomJS)

def process_url(driver, url):

    driver.get(url)
    ## some work

它工作得很好,但我浪费了一些时间让 phantomjs prosses 吃午饭,你知道 shell 我做什么吗?

不得不改变

pile.spawn(process_urlv2(driver, url))

pile.spawn(process_urlv2, driver, url)