无法使用 QUrl
Cannot use QUrl
我正在尝试学习 PyQt5 上的动态 Web 抓取。我正在查找针对 PyQt4 的教程,因此在 Qt5 中有一些不同的库。
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
from PyQt5.QtWebEngineWidgets import QWebEnginePage
import bs4 as bs
import urllib.request
class Client(QWebEnginePage):
def _init_(self, url):
self.app=QApplication(sys.argv)
QWebPage._init_(self)
self.loadFinished.connect(self.on_page_load)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def on_page_load(self):
self.app.quit()
url='https://pythonprogramming.net/parsememcparseface/'
client_response=Client(url)
source=client_response.mainFrame().toHtml()
#sauce=urllib.request.urlopen('https://pythonprogramming.net/sitemap.xml').read()
soup=bs.BeautifulSoup(sauce,'xml')
js_test=soup.find('p',class_='jstest')
print(js_test.text)
显示以下错误:
Traceback (most recent call last):
File "jsp.py", line 19, in <module>
client_response=Client(url)
TypeError: arguments did not match any overloaded call:
QWebEnginePage(parent: QObject = None): argument 1 has unexpected type 'str'
QWebEnginePage(QWebEngineProfile, parent: QObject = None): argument 1 has unexpected
有人帮帮我!
您的代码有几个错误:
- init前后必须有2个下划线
QWebEnginePage
没有 mainFrame()
作为方法,现在你必须直接加载它
- 出现的另一个变化是
toHtml()
函数不再是同步的,因此它会要求您回调以获取 html,但经过我的修改,它又是同步的。
代码:
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl, pyqtSignal, QEventLoop
from PyQt5.QtWebEngineWidgets import QWebEnginePage
class Client(QWebEnginePage):
toHtmlFinished = pyqtSignal()
def __init__(self, url):
self.app=QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.loadFinished.connect(self.on_page_load)
self.load(QUrl(url))
self.app.exec_()
def on_page_load(self):
self.app.quit()
def store_html(self, html):
self.html = html
self.toHtmlFinished.emit()
def get_html(self):
self.toHtml(self.store_html)
loop = QEventLoop()
self.toHtmlFinished.connect(loop.quit)
loop.exec_()
return self.html
url='https://pythonprogramming.net/parsememcparseface/'
client_response=Client(url)
source=client_response.get_html()
print(source)
参考文献:
我正在尝试学习 PyQt5 上的动态 Web 抓取。我正在查找针对 PyQt4 的教程,因此在 Qt5 中有一些不同的库。
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
from PyQt5.QtWebEngineWidgets import QWebEnginePage
import bs4 as bs
import urllib.request
class Client(QWebEnginePage):
def _init_(self, url):
self.app=QApplication(sys.argv)
QWebPage._init_(self)
self.loadFinished.connect(self.on_page_load)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def on_page_load(self):
self.app.quit()
url='https://pythonprogramming.net/parsememcparseface/'
client_response=Client(url)
source=client_response.mainFrame().toHtml()
#sauce=urllib.request.urlopen('https://pythonprogramming.net/sitemap.xml').read()
soup=bs.BeautifulSoup(sauce,'xml')
js_test=soup.find('p',class_='jstest')
print(js_test.text)
显示以下错误:
Traceback (most recent call last):
File "jsp.py", line 19, in <module>
client_response=Client(url)
TypeError: arguments did not match any overloaded call:
QWebEnginePage(parent: QObject = None): argument 1 has unexpected type 'str'
QWebEnginePage(QWebEngineProfile, parent: QObject = None): argument 1 has unexpected
有人帮帮我!
您的代码有几个错误:
- init前后必须有2个下划线
QWebEnginePage
没有mainFrame()
作为方法,现在你必须直接加载它- 出现的另一个变化是
toHtml()
函数不再是同步的,因此它会要求您回调以获取 html,但经过我的修改,它又是同步的。
代码:
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl, pyqtSignal, QEventLoop
from PyQt5.QtWebEngineWidgets import QWebEnginePage
class Client(QWebEnginePage):
toHtmlFinished = pyqtSignal()
def __init__(self, url):
self.app=QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.loadFinished.connect(self.on_page_load)
self.load(QUrl(url))
self.app.exec_()
def on_page_load(self):
self.app.quit()
def store_html(self, html):
self.html = html
self.toHtmlFinished.emit()
def get_html(self):
self.toHtml(self.store_html)
loop = QEventLoop()
self.toHtmlFinished.connect(loop.quit)
loop.exec_()
return self.html
url='https://pythonprogramming.net/parsememcparseface/'
client_response=Client(url)
source=client_response.get_html()
print(source)
参考文献: