PySide.QtWebKit.QWebFrame 参数不正确的加载方法

PySide.QtWebKit.QWebFrame load method with incorrect parameters

我正在尝试使用此 github link:

中可用的国际象棋在线下棋网站爬虫

https://github.com/Rseiji/ChessCommentaryGeneration(我从原始仓库创建的一个分支)

它使用Python2和PyQt4,其模块QtWebKit4不再可用。

所以,我找到了这个 link:

我不是很了解(什么是sparta?),但我知道有一个名为PySide的库,它有一个可以使用的模块QtWebKit。

所以我尝试修改爬虫的代码,简单地更改import

import sys  
from PyQt4.QtGui import *  
from PyQt4.QtCore import *  
#from PyQt4.QtWebKit import *  
from PySide.QtWebKit import *
from lxml import html 
import pickle
import time
from PyQt4 import QtGui, QtCore
import functools
import sys


import argparse
def parseArguments():
   parser = argparse.ArgumentParser()
   #parser.add_argument("-typ", dest="typ", help="home or subsequent", default='home')
   parser.add_argument("-i", type=int, dest="i", help="i")
   parser.add_argument("-num", type=int, dest="num", help="num")
   args = parser.parse_args()  
   return args
params = parseArguments()
#typ = params.typ


#Take this class for granted.Just use result of rendering.
class Render(QWebPage):  
 def __init__(self, url):  
   self.app = QApplication(sys.argv)  
   QWebPage.__init__(self)  
   self.loadFinished.connect(self._loadFinished)  
   qurl = QUrl(url)
   func = functools.partial(self.mainFrame().load, qurl )  
   timer = QtCore.QTimer()
   timer.timeout.connect(func)
   timer.start(10000)
   self.app.exec_()  

 def _loadFinished(self, result):  
   self.frame = self.mainFrame()  
   self.app.quit()  

def save_all():
   global cur_url
   global html_doc
   all_links = pickle.load( open("./saved_files/saved_links.p", "r") )
   #extra_links = pickle.load( open("extra_pages.p", "r") )
   print "len(all_links) = ",len(all_links)
   num = sys.argv[1]

   i = params.i
   print "i = ",type(i)
   num = params.num
   url = all_links[i]
   if num!=0:
       url+="&pg="+str(num)
   print "i, url = ",i,url
   #This step is important.Converting QString to Ascii for lxml to process
   #archive_links = html.fromstring(str(result.toAscii()))

   cur_url = url
   error_count = 0
   try:
       r = Render(cur_url)
       result = r.frame.toHtml()
       html_doc = result.toAscii()

       if num==0:
           fw = open("./saved_files/saved"+str(i)+".html", "w")
       else:
           fw = open("./saved_files/saved"+str(i)+"_" + str(num) + ".html", "w")
       fw.write(html_doc)
       fw.close()
       print "---- SLEEPING ---- "
       time.sleep(10)
   except:
       print "ERROR!!"
       error_count+=1
       print "error_count = ",error_count
   ##if i>4:
   ##  break

if __name__=="__main__":
   save_all()

之前,用python run_all.py 0 11577 1执行代码时,错误是QtWebKit模块,但现在,我得到:

TypeError: 'PySide.QtWebKit.QWebFrame.load' called with wrong argument types:
  PySide.QtWebKit.QWebFrame.load(QUrl)
Supported signatures:
  PySide.QtWebKit.QWebFrame.load(PySide.QtNetwork.QNetworkRequest, PySide.QtNetwork.QNetworkAccessManager.Operation = QNetworkAccessManager.GetOperation, PySide.QtCore.QByteArray = QByteArray())
  PySide.QtWebKit.QWebFrame.load(PySide.QtCore.QUrl)

它不指示任何代码行,并不断重复此消息。

我能做什么?

谢谢!

虽然 PyQt4 和 PySide 是 Qt4 包装器,但它们彼此不兼容,这就是错误的原因。解决方案是使用 PyQt4 或使用 PySide,而不是两者。在这种情况下,PySide 的代码是:

import argparse
import functools
from lxml import html
import pickle
import sys
import time


# from PyQt4 import QtCore, QtGui, QtWebKit
from PySide import QtCore, QtGui, QtWebKit


def parseArguments():
    parser = argparse.ArgumentParser()
    # parser.add_argument("-typ", dest="typ", help="home or subsequent", default='home')
    parser.add_argument("-i", type=int, dest="i", help="i")
    parser.add_argument("-num", type=int, dest="num", help="num")
    args = parser.parse_args()
    return args


params = parseArguments()
# typ = params.typ


# Take this class for granted.Just use result of rendering.
class Render(QtWebKit.QWebPage):
    def __init__(self, url):
        self.app = QtGui.QApplication(sys.argv)
        QtWebKit.QWebPage.__init__(self)
        self.loadFinished.connect(self._loadFinished)
        qurl = QtCore.QUrl(url)
        func = functools.partial(self.mainFrame().load, qurl)
        timer = QtCore.QTimer()
        timer.timeout.connect(func)
        timer.start(10000)
        self.app.exec_()

    def _loadFinished(self, result):
        self.frame = self.mainFrame()
        self.app.quit()


def save_all():
    global cur_url
    global html_doc
    all_links = pickle.load(open("./saved_files/saved_links.p", "r"))
    # extra_links = pickle.load( open("extra_pages.p", "r") )
    print("len(all_links) = ", len(all_links))
    num = sys.argv[1]

    i = params.i
    print("i = ", type(i))
    num = params.num
    url = all_links[i]
    if num != 0:
        url += "&pg=" + str(num)
    print("i, url = ", i, url)
    # This step is important.Converting QString to Ascii for lxml to process
    # archive_links = html.fromstring(str(result.toAscii()))

    cur_url = url
    error_count = 0
    try:
        r = Render(cur_url)
        result = r.frame.toHtml()
        html_doc = result.toAscii()

        if num == 0:
            fw = open("./saved_files/saved" + str(i) + ".html", "w")
        else:
            fw = open("./saved_files/saved" + str(i) + "_" + str(num) + ".html", "w")
        fw.write(html_doc)
        fw.close()
        print("---- SLEEPING ---- ")
        time.sleep(10)
    except:
        print("ERROR!!")
        error_count += 1
        print("error_count = ", error_count)
    ##if i>4:
    ##  break


if __name__ == "__main__":
    save_all()