VBA Web 抓取脚本 Returns 下标超出范围
VBA WebScraping Script Returns Subscript OutOf Range
问题:
我一直试图从网站上抓取数据,但它总是给我一个错误 下标超出范围。我不知道为什么。我在另一个网站上使用了完全相同的代码,它工作得很好。
是的,我已经从我想从中抓取的新网站更改了 divs
。
代码:
Option Explicit
Public Sub Loiça()
Dim data As Object, i As Long, html As HTMLDocument, r As Long, c As Long, item As Object, div As Object
Set html = New HTMLDocument '<== VBE > Tools > References > Microsoft HTML Object Library
Const START_URL As String = "https://mediamarkt.pt/pages/search-results-page?q=maquina+roupa&page=1"
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", START_URL, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
html.body.innerHTML = .responseText
Dim numPages As Long, numResults As Long, arr() As String
arr = Split(html.querySelector(".snize-search-results-header").innerText, Chr$(32))
numResults = arr(UBound(arr))
numPages = 1
For i = 1 To numPages
If i > 1 Then
.Open "GET", Replace$("https://mediamarkt.pt/pages/search-results-page?q=maquina+roupa&page=1", "page=1", "page=" & i), False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
html.body.innerHTML = .responseText
End If
Set data = html.getElementsByClassName("snize-four-columns")
For Each item In data
r = r + 1: c = 1
For Each div In item.getElementsByTagName("div")
With ThisWorkbook.Worksheets("Loiça")
.Cells(r, c) = div.innerText
End With
c = c + 1
Next
Next
Next
End With
'----------------------------------------------------------------------------------------------------------------------------------------------------------------------'
End Sub
这个
html.querySelector(".snize-search-results-header").innerText
返回一个空字符串,因此当您拆分时,您最终会在 arr 中得到一个 -1。
可能是这个值需要页面上javascript到运行。检查返回的 html。我认为没有返回值。使用 selenium 或 IE 之类的方法,允许 js 在页面上 运行 并使用值
更新内容
在这种情况下,您还需要 lbound,因此您可以使用 returns 使用 IE
的值的函数
numPages = GetNumberOfPages
Public Function GetNumberOfPages() As Long
Dim IE As New InternetExplorer
With IE
.Visible = False
.Navigate2 "https://mediamarkt.pt/pages/search-results-page?q=maquina+roupa&page=1"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim numPages As Long, numResults As Long, arr() As String
arr = Split(.document.querySelector(".snize-search-results-header").innerText, Chr$(32))
numResults = arr(LBound(arr))
GetNumberOfPages = numResults
.Quit
End With
End Function
对于下一页,您正在查看不同的 class 名称(我认为)
Set data = html.getElementsByClassName("snize-product")
检查 html 进行验证。
问题:
我一直试图从网站上抓取数据,但它总是给我一个错误 下标超出范围。我不知道为什么。我在另一个网站上使用了完全相同的代码,它工作得很好。
是的,我已经从我想从中抓取的新网站更改了 divs
。
代码:
Option Explicit
Public Sub Loiça()
Dim data As Object, i As Long, html As HTMLDocument, r As Long, c As Long, item As Object, div As Object
Set html = New HTMLDocument '<== VBE > Tools > References > Microsoft HTML Object Library
Const START_URL As String = "https://mediamarkt.pt/pages/search-results-page?q=maquina+roupa&page=1"
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", START_URL, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
html.body.innerHTML = .responseText
Dim numPages As Long, numResults As Long, arr() As String
arr = Split(html.querySelector(".snize-search-results-header").innerText, Chr$(32))
numResults = arr(UBound(arr))
numPages = 1
For i = 1 To numPages
If i > 1 Then
.Open "GET", Replace$("https://mediamarkt.pt/pages/search-results-page?q=maquina+roupa&page=1", "page=1", "page=" & i), False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
html.body.innerHTML = .responseText
End If
Set data = html.getElementsByClassName("snize-four-columns")
For Each item In data
r = r + 1: c = 1
For Each div In item.getElementsByTagName("div")
With ThisWorkbook.Worksheets("Loiça")
.Cells(r, c) = div.innerText
End With
c = c + 1
Next
Next
Next
End With
'----------------------------------------------------------------------------------------------------------------------------------------------------------------------'
End Sub
这个
html.querySelector(".snize-search-results-header").innerText
返回一个空字符串,因此当您拆分时,您最终会在 arr 中得到一个 -1。
可能是这个值需要页面上javascript到运行。检查返回的 html。我认为没有返回值。使用 selenium 或 IE 之类的方法,允许 js 在页面上 运行 并使用值
更新内容在这种情况下,您还需要 lbound,因此您可以使用 returns 使用 IE
的值的函数numPages = GetNumberOfPages
Public Function GetNumberOfPages() As Long
Dim IE As New InternetExplorer
With IE
.Visible = False
.Navigate2 "https://mediamarkt.pt/pages/search-results-page?q=maquina+roupa&page=1"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim numPages As Long, numResults As Long, arr() As String
arr = Split(.document.querySelector(".snize-search-results-header").innerText, Chr$(32))
numResults = arr(LBound(arr))
GetNumberOfPages = numResults
.Quit
End With
End Function
对于下一页,您正在查看不同的 class 名称(我认为)
Set data = html.getElementsByClassName("snize-product")
检查 html 进行验证。