PySide.QtWebKit.QWebFrame 参数不正确的加载方法
PySide.QtWebKit.QWebFrame load method with incorrect parameters
我正在尝试使用此 github link:
中可用的国际象棋在线下棋网站爬虫
https://github.com/Rseiji/ChessCommentaryGeneration(我从原始仓库创建的一个分支)
它使用Python2和PyQt4,其模块QtWebKit4不再可用。
所以,我找到了这个 link:
我不是很了解(什么是sparta?),但我知道有一个名为PySide的库,它有一个可以使用的模块QtWebKit。
所以我尝试修改爬虫的代码,简单地更改import
行
import sys
from PyQt4.QtGui import *
from PyQt4.QtCore import *
#from PyQt4.QtWebKit import *
from PySide.QtWebKit import *
from lxml import html
import pickle
import time
from PyQt4 import QtGui, QtCore
import functools
import sys
import argparse
def parseArguments():
parser = argparse.ArgumentParser()
#parser.add_argument("-typ", dest="typ", help="home or subsequent", default='home')
parser.add_argument("-i", type=int, dest="i", help="i")
parser.add_argument("-num", type=int, dest="num", help="num")
args = parser.parse_args()
return args
params = parseArguments()
#typ = params.typ
#Take this class for granted.Just use result of rendering.
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
qurl = QUrl(url)
func = functools.partial(self.mainFrame().load, qurl )
timer = QtCore.QTimer()
timer.timeout.connect(func)
timer.start(10000)
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
def save_all():
global cur_url
global html_doc
all_links = pickle.load( open("./saved_files/saved_links.p", "r") )
#extra_links = pickle.load( open("extra_pages.p", "r") )
print "len(all_links) = ",len(all_links)
num = sys.argv[1]
i = params.i
print "i = ",type(i)
num = params.num
url = all_links[i]
if num!=0:
url+="&pg="+str(num)
print "i, url = ",i,url
#This step is important.Converting QString to Ascii for lxml to process
#archive_links = html.fromstring(str(result.toAscii()))
cur_url = url
error_count = 0
try:
r = Render(cur_url)
result = r.frame.toHtml()
html_doc = result.toAscii()
if num==0:
fw = open("./saved_files/saved"+str(i)+".html", "w")
else:
fw = open("./saved_files/saved"+str(i)+"_" + str(num) + ".html", "w")
fw.write(html_doc)
fw.close()
print "---- SLEEPING ---- "
time.sleep(10)
except:
print "ERROR!!"
error_count+=1
print "error_count = ",error_count
##if i>4:
## break
if __name__=="__main__":
save_all()
之前,用python run_all.py 0 11577 1
执行代码时,错误是QtWebKit
模块,但现在,我得到:
TypeError: 'PySide.QtWebKit.QWebFrame.load' called with wrong argument types:
PySide.QtWebKit.QWebFrame.load(QUrl)
Supported signatures:
PySide.QtWebKit.QWebFrame.load(PySide.QtNetwork.QNetworkRequest, PySide.QtNetwork.QNetworkAccessManager.Operation = QNetworkAccessManager.GetOperation, PySide.QtCore.QByteArray = QByteArray())
PySide.QtWebKit.QWebFrame.load(PySide.QtCore.QUrl)
它不指示任何代码行,并不断重复此消息。
我能做什么?
谢谢!
虽然 PyQt4 和 PySide 是 Qt4 包装器,但它们彼此不兼容,这就是错误的原因。解决方案是使用 PyQt4 或使用 PySide,而不是两者。在这种情况下,PySide 的代码是:
import argparse
import functools
from lxml import html
import pickle
import sys
import time
# from PyQt4 import QtCore, QtGui, QtWebKit
from PySide import QtCore, QtGui, QtWebKit
def parseArguments():
parser = argparse.ArgumentParser()
# parser.add_argument("-typ", dest="typ", help="home or subsequent", default='home')
parser.add_argument("-i", type=int, dest="i", help="i")
parser.add_argument("-num", type=int, dest="num", help="num")
args = parser.parse_args()
return args
params = parseArguments()
# typ = params.typ
# Take this class for granted.Just use result of rendering.
class Render(QtWebKit.QWebPage):
def __init__(self, url):
self.app = QtGui.QApplication(sys.argv)
QtWebKit.QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
qurl = QtCore.QUrl(url)
func = functools.partial(self.mainFrame().load, qurl)
timer = QtCore.QTimer()
timer.timeout.connect(func)
timer.start(10000)
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
def save_all():
global cur_url
global html_doc
all_links = pickle.load(open("./saved_files/saved_links.p", "r"))
# extra_links = pickle.load( open("extra_pages.p", "r") )
print("len(all_links) = ", len(all_links))
num = sys.argv[1]
i = params.i
print("i = ", type(i))
num = params.num
url = all_links[i]
if num != 0:
url += "&pg=" + str(num)
print("i, url = ", i, url)
# This step is important.Converting QString to Ascii for lxml to process
# archive_links = html.fromstring(str(result.toAscii()))
cur_url = url
error_count = 0
try:
r = Render(cur_url)
result = r.frame.toHtml()
html_doc = result.toAscii()
if num == 0:
fw = open("./saved_files/saved" + str(i) + ".html", "w")
else:
fw = open("./saved_files/saved" + str(i) + "_" + str(num) + ".html", "w")
fw.write(html_doc)
fw.close()
print("---- SLEEPING ---- ")
time.sleep(10)
except:
print("ERROR!!")
error_count += 1
print("error_count = ", error_count)
##if i>4:
## break
if __name__ == "__main__":
save_all()
我正在尝试使用此 github link:
中可用的国际象棋在线下棋网站爬虫https://github.com/Rseiji/ChessCommentaryGeneration(我从原始仓库创建的一个分支)
它使用Python2和PyQt4,其模块QtWebKit4不再可用。
所以,我找到了这个 link:
我不是很了解(什么是sparta?),但我知道有一个名为PySide的库,它有一个可以使用的模块QtWebKit。
所以我尝试修改爬虫的代码,简单地更改import
行
import sys
from PyQt4.QtGui import *
from PyQt4.QtCore import *
#from PyQt4.QtWebKit import *
from PySide.QtWebKit import *
from lxml import html
import pickle
import time
from PyQt4 import QtGui, QtCore
import functools
import sys
import argparse
def parseArguments():
parser = argparse.ArgumentParser()
#parser.add_argument("-typ", dest="typ", help="home or subsequent", default='home')
parser.add_argument("-i", type=int, dest="i", help="i")
parser.add_argument("-num", type=int, dest="num", help="num")
args = parser.parse_args()
return args
params = parseArguments()
#typ = params.typ
#Take this class for granted.Just use result of rendering.
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
qurl = QUrl(url)
func = functools.partial(self.mainFrame().load, qurl )
timer = QtCore.QTimer()
timer.timeout.connect(func)
timer.start(10000)
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
def save_all():
global cur_url
global html_doc
all_links = pickle.load( open("./saved_files/saved_links.p", "r") )
#extra_links = pickle.load( open("extra_pages.p", "r") )
print "len(all_links) = ",len(all_links)
num = sys.argv[1]
i = params.i
print "i = ",type(i)
num = params.num
url = all_links[i]
if num!=0:
url+="&pg="+str(num)
print "i, url = ",i,url
#This step is important.Converting QString to Ascii for lxml to process
#archive_links = html.fromstring(str(result.toAscii()))
cur_url = url
error_count = 0
try:
r = Render(cur_url)
result = r.frame.toHtml()
html_doc = result.toAscii()
if num==0:
fw = open("./saved_files/saved"+str(i)+".html", "w")
else:
fw = open("./saved_files/saved"+str(i)+"_" + str(num) + ".html", "w")
fw.write(html_doc)
fw.close()
print "---- SLEEPING ---- "
time.sleep(10)
except:
print "ERROR!!"
error_count+=1
print "error_count = ",error_count
##if i>4:
## break
if __name__=="__main__":
save_all()
之前,用python run_all.py 0 11577 1
执行代码时,错误是QtWebKit
模块,但现在,我得到:
TypeError: 'PySide.QtWebKit.QWebFrame.load' called with wrong argument types:
PySide.QtWebKit.QWebFrame.load(QUrl)
Supported signatures:
PySide.QtWebKit.QWebFrame.load(PySide.QtNetwork.QNetworkRequest, PySide.QtNetwork.QNetworkAccessManager.Operation = QNetworkAccessManager.GetOperation, PySide.QtCore.QByteArray = QByteArray())
PySide.QtWebKit.QWebFrame.load(PySide.QtCore.QUrl)
它不指示任何代码行,并不断重复此消息。
我能做什么?
谢谢!
虽然 PyQt4 和 PySide 是 Qt4 包装器,但它们彼此不兼容,这就是错误的原因。解决方案是使用 PyQt4 或使用 PySide,而不是两者。在这种情况下,PySide 的代码是:
import argparse
import functools
from lxml import html
import pickle
import sys
import time
# from PyQt4 import QtCore, QtGui, QtWebKit
from PySide import QtCore, QtGui, QtWebKit
def parseArguments():
parser = argparse.ArgumentParser()
# parser.add_argument("-typ", dest="typ", help="home or subsequent", default='home')
parser.add_argument("-i", type=int, dest="i", help="i")
parser.add_argument("-num", type=int, dest="num", help="num")
args = parser.parse_args()
return args
params = parseArguments()
# typ = params.typ
# Take this class for granted.Just use result of rendering.
class Render(QtWebKit.QWebPage):
def __init__(self, url):
self.app = QtGui.QApplication(sys.argv)
QtWebKit.QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
qurl = QtCore.QUrl(url)
func = functools.partial(self.mainFrame().load, qurl)
timer = QtCore.QTimer()
timer.timeout.connect(func)
timer.start(10000)
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
def save_all():
global cur_url
global html_doc
all_links = pickle.load(open("./saved_files/saved_links.p", "r"))
# extra_links = pickle.load( open("extra_pages.p", "r") )
print("len(all_links) = ", len(all_links))
num = sys.argv[1]
i = params.i
print("i = ", type(i))
num = params.num
url = all_links[i]
if num != 0:
url += "&pg=" + str(num)
print("i, url = ", i, url)
# This step is important.Converting QString to Ascii for lxml to process
# archive_links = html.fromstring(str(result.toAscii()))
cur_url = url
error_count = 0
try:
r = Render(cur_url)
result = r.frame.toHtml()
html_doc = result.toAscii()
if num == 0:
fw = open("./saved_files/saved" + str(i) + ".html", "w")
else:
fw = open("./saved_files/saved" + str(i) + "_" + str(num) + ".html", "w")
fw.write(html_doc)
fw.close()
print("---- SLEEPING ---- ")
time.sleep(10)
except:
print("ERROR!!")
error_count += 1
print("error_count = ", error_count)
##if i>4:
## break
if __name__ == "__main__":
save_all()