使用VBA点击html按钮,然后抓取刷新的数据
Use VBA to click html button and then scrape refreshed data
我正在尝试编写一个将日期输入到输入框中的程序
<input name="Mdate" type="text" id="Mdate" size="30" value="" /></td>
点击提交按钮
<input type="submit" name="button" id="button" value="Submit" />
然后抓取出现在 "a" 标签中的结果数据。
<center>
<b>Tuesday, 6 January 2015</b><br />
<a href="/horse-racing-results/Ruakaka/2015-1-6" target="_blank">Ruakaka</a>
在输入提交按钮之前,此数据不可用。我的尝试在下面完整发布。我似乎遇到的问题是我无法访问修改后的 html 代码(通过单击提交修改)。谁能提供一些建议?
'dimension variables
Dim ie As InternetExplorer
Dim htmldoc As MSHTML.IHTMLDocument 'Document object
Dim inputs As MSHTML.IHTMLElementCollection 'Element collection for "input" tags
Dim eles1, eles2 As MSHTML.IHTMLElementCollection 'Element collection for th tags
Dim element As MSHTML.IHTMLElement 'input elements
Dim ele1, ele2 As MSHTML.IHTMLElement 'Header elements
'Open InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = False 'make IE invisible
'Navigate to webpage
Dim ieURL As String: ieURL = "http://www.racenet.com.au/horse-racing-results/" 'set URL from which to retrieve racemeet and date data
ie.navigate ieURL 'navigate to URL
Do While ie.Busy Or ie.readyState <> 4 'wait for page to load
DoEvents
Loop
Set htmldoc = ie.document 'Document webpage
Set inputs = htmldoc.getElementsByTagName("input") 'Find all input tags
Dim dd, mm, yyyy As Integer
Dim startdate, enddate As Date
Dim i, j, k As Long
Dim raceMeet, raceURL As String
startdate = #1/1/2008#: enddate = Date - 1
Dim racemeetArr As Variant
ReDim racemeetArr(1 To 2, 1)
For i = startdate To enddate
dd = Day(i): mm = Month(i): yyyy = Year(i)
For Each element In inputs
If element.Name = "Mdate" Then
element.Value = yyyy & "-" & mm & "-" & dd
Else
If element.Name = "button" Then
element.Click
'insert scraper
Set eles1 = htmldoc.getElementsByTagName("a") 'Find all centre tags
For Each ele1 In eles1
If InStr(ele1.href, "/horse-racing-results/") > 0 Then
raceMeet = ele1.innerText
raceURL = ele1.innerHTML
ReDim Preserve racemeetArr(1 To 2, UBound(racemeetArr, 2) + 1)
racemeetArr(1, UBound(racemeetArr, 2)) = raceMeet
racemeetArr(2, UBound(racemeetArr, 2)) = raceURL
End If
Next ele1
Else
End If
End If
Next element
Stop
Next i
ie.Quit
插入页面加载时等待的条件。
以下重写成功地从我的电脑上的目标页面获取数据:
Private Sub CommandButton1_Click()
'dimension variables
Dim ie As InternetExplorer
Dim htmldoc As MSHTML.IHTMLDocument 'Document object
Dim inputs As MSHTML.IHTMLElementCollection 'Element collection for "input" tags
Dim eles1, eles2 As MSHTML.IHTMLElementCollection 'Element collection for th tags
Dim element As MSHTML.IHTMLElement 'input elements
Dim ele1, ele2 As MSHTML.IHTMLElement 'Header elements
'Open InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True 'make IE invisible
'Navigate to webpage
Dim ieURL As String: ieURL = "http://www.racenet.com.au/horse-racing-results/" 'set URL from which to retrieve racemeet and date data
ie.navigate ieURL 'navigate to URL
Do While ie.Busy Or ie.readyState <> 4 'wait for page to load
DoEvents
Loop
Set htmldoc = ie.document 'Document webpage
Set inputs = htmldoc.getElementsByTagName("input") 'Find all input tags
Dim dd, mm, yyyy As Integer
Dim startdate, enddate As Date
Dim i, j, k As Long
Dim raceMeet, raceURL As String
startdate = #1/1/2008#: enddate = Date - 1
Dim racemeetArr As Variant
ReDim racemeetArr(1 To 2, 1)
For i = startdate To enddate
dd = Day(i): mm = Month(i): yyyy = Year(i)
For Each element In inputs
If element.Name = "Mdate" Then
element.Value = yyyy & "-" & mm & "-" & dd
Else
If element.Name = "button" Then
element.Click
Exit For
End If
End If
Next element
Do
' Wait until the Browser is loaded'
Loop Until ie.readyState = READYSTATE_COMPLETE
'insert scraper
Set eles1 = htmldoc.getElementsByTagName("a") 'Find all centre tags
For Each ele1 In eles1
If InStr(ele1.href, "/horse-racing-results/") > 0 Then
raceMeet = ele1.innerText
raceURL = ele1.innerHTML
ReDim Preserve racemeetArr(1 To 2, UBound(racemeetArr, 2) + 1)
racemeetArr(1, UBound(racemeetArr, 2)) = raceMeet
racemeetArr(2, UBound(racemeetArr, 2)) = raceURL
End If
Next ele1
Stop
Next i
ie.Quit
End Sub
编辑:
分析完HTTP请求后,我把代码精简了一点(可以直接查询结果,不用填表提交页面)
我不太喜欢昂贵的数组 ReDims,所以我创建了一个 class,并将结果保存在 class 的集合中(随意使用或不使用) .
添加一个新的 class 模块,将其命名为 clRaceMeet 并粘贴此代码:
Option Explicit
Private pMeet As String
Private pUrl As String
Public Property Let Meet(ByVal Val As String)
pMeet = Val
End Property
Public Property Get Meet() As String
Meet = pMeet
End Property
Public Property Let URL(ByVal Val As String)
pUrl = Val
End Property
Public Property Get URL() As String
URL = pUrl
End Property
然后,使用这个修改后的代码版本来抓取数据并将其转储到调试window:
Option Explicit
Private Sub CommandButton1_Click()
'dimension variables
Dim ie As InternetExplorer
Dim ieURL As String
Dim dd As Integer
Dim mm As Integer
Dim yyyy As Integer
Dim startDate As Date
Dim endDate As Date
Dim i As Long
Dim htmlDoc As MSHTML.IHTMLDocument
Dim colLeftEleColl As MSHTML.IHTMLElementCollection
Dim colLeftEle As MSHTML.IHTMLElement
Dim centerEleColl As MSHTML.IHTMLElementCollection
Dim centerEle As MSHTML.IHTMLElement
Dim raceMeet As String
Dim raceURL As String
Dim objRaceMeet As clRaceMeet
Dim raceMeetColl As New Collection
'Open InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
startDate = #1/1/2009#
endDate = Date - 1
For i = startDate To endDate
dd = Day(i)
mm = Month(i)
yyyy = Year(i)
ieURL = "http://www.racenet.com.au/horse-racing-results-search.asp?Mdate=" & yyyy & "-" & mm & "-" & dd
ie.navigate ieURL
Do
' Wait until the Browser is loaded'
Loop Until ie.readyState = READYSTATE_COMPLETE
Set htmlDoc = ie.document
'insert scraper
Set colLeftEleColl = htmlDoc.getElementById("ColLeft").all
'Loop through elements of ColLeft div
For Each colLeftEle In colLeftEleColl
If colLeftEle.tagName = "CENTER" Then
Set centerEleColl = colLeftEle.all
'Loop through elements of <center> tag
For Each centerEle In centerEleColl
If centerEle.tagName = "A" Then
If InStr(centerEle.href, "/horse-racing-results/") > 0 Then
raceMeet = centerEle.innerText
raceURL = centerEle.href
Set objRaceMeet = New clRaceMeet
objRaceMeet.Meet = raceMeet
objRaceMeet.URL = raceURL
raceMeetColl.Add objRaceMeet
End If
End If
Next centerEle
Exit For
End If
Next colLeftEle
' Dump results to immediate window:
For Each objRaceMeet In raceMeetColl
Debug.Print objRaceMeet.Meet & " - " & objRaceMeet.URL
Next objRaceMeet
'Stop
Next i
ie.Quit
End Sub
祝您下注愉快! :)
我玩弄了最后一个,for next 循环中的 for each 循环必须紧随其后。然后我也将它列在 sheet1 中并且它起作用了。我做了一些小的调整,例如添加一个变量来增加单元格。
这段代码并没有产生网站的实际结果,不确定这是否是您的目标。
我正在尝试编写一个将日期输入到输入框中的程序
<input name="Mdate" type="text" id="Mdate" size="30" value="" /></td>
点击提交按钮
<input type="submit" name="button" id="button" value="Submit" />
然后抓取出现在 "a" 标签中的结果数据。
<center>
<b>Tuesday, 6 January 2015</b><br />
<a href="/horse-racing-results/Ruakaka/2015-1-6" target="_blank">Ruakaka</a>
在输入提交按钮之前,此数据不可用。我的尝试在下面完整发布。我似乎遇到的问题是我无法访问修改后的 html 代码(通过单击提交修改)。谁能提供一些建议?
'dimension variables
Dim ie As InternetExplorer
Dim htmldoc As MSHTML.IHTMLDocument 'Document object
Dim inputs As MSHTML.IHTMLElementCollection 'Element collection for "input" tags
Dim eles1, eles2 As MSHTML.IHTMLElementCollection 'Element collection for th tags
Dim element As MSHTML.IHTMLElement 'input elements
Dim ele1, ele2 As MSHTML.IHTMLElement 'Header elements
'Open InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = False 'make IE invisible
'Navigate to webpage
Dim ieURL As String: ieURL = "http://www.racenet.com.au/horse-racing-results/" 'set URL from which to retrieve racemeet and date data
ie.navigate ieURL 'navigate to URL
Do While ie.Busy Or ie.readyState <> 4 'wait for page to load
DoEvents
Loop
Set htmldoc = ie.document 'Document webpage
Set inputs = htmldoc.getElementsByTagName("input") 'Find all input tags
Dim dd, mm, yyyy As Integer
Dim startdate, enddate As Date
Dim i, j, k As Long
Dim raceMeet, raceURL As String
startdate = #1/1/2008#: enddate = Date - 1
Dim racemeetArr As Variant
ReDim racemeetArr(1 To 2, 1)
For i = startdate To enddate
dd = Day(i): mm = Month(i): yyyy = Year(i)
For Each element In inputs
If element.Name = "Mdate" Then
element.Value = yyyy & "-" & mm & "-" & dd
Else
If element.Name = "button" Then
element.Click
'insert scraper
Set eles1 = htmldoc.getElementsByTagName("a") 'Find all centre tags
For Each ele1 In eles1
If InStr(ele1.href, "/horse-racing-results/") > 0 Then
raceMeet = ele1.innerText
raceURL = ele1.innerHTML
ReDim Preserve racemeetArr(1 To 2, UBound(racemeetArr, 2) + 1)
racemeetArr(1, UBound(racemeetArr, 2)) = raceMeet
racemeetArr(2, UBound(racemeetArr, 2)) = raceURL
End If
Next ele1
Else
End If
End If
Next element
Stop
Next i
ie.Quit
插入页面加载时等待的条件。
以下重写成功地从我的电脑上的目标页面获取数据:
Private Sub CommandButton1_Click()
'dimension variables
Dim ie As InternetExplorer
Dim htmldoc As MSHTML.IHTMLDocument 'Document object
Dim inputs As MSHTML.IHTMLElementCollection 'Element collection for "input" tags
Dim eles1, eles2 As MSHTML.IHTMLElementCollection 'Element collection for th tags
Dim element As MSHTML.IHTMLElement 'input elements
Dim ele1, ele2 As MSHTML.IHTMLElement 'Header elements
'Open InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True 'make IE invisible
'Navigate to webpage
Dim ieURL As String: ieURL = "http://www.racenet.com.au/horse-racing-results/" 'set URL from which to retrieve racemeet and date data
ie.navigate ieURL 'navigate to URL
Do While ie.Busy Or ie.readyState <> 4 'wait for page to load
DoEvents
Loop
Set htmldoc = ie.document 'Document webpage
Set inputs = htmldoc.getElementsByTagName("input") 'Find all input tags
Dim dd, mm, yyyy As Integer
Dim startdate, enddate As Date
Dim i, j, k As Long
Dim raceMeet, raceURL As String
startdate = #1/1/2008#: enddate = Date - 1
Dim racemeetArr As Variant
ReDim racemeetArr(1 To 2, 1)
For i = startdate To enddate
dd = Day(i): mm = Month(i): yyyy = Year(i)
For Each element In inputs
If element.Name = "Mdate" Then
element.Value = yyyy & "-" & mm & "-" & dd
Else
If element.Name = "button" Then
element.Click
Exit For
End If
End If
Next element
Do
' Wait until the Browser is loaded'
Loop Until ie.readyState = READYSTATE_COMPLETE
'insert scraper
Set eles1 = htmldoc.getElementsByTagName("a") 'Find all centre tags
For Each ele1 In eles1
If InStr(ele1.href, "/horse-racing-results/") > 0 Then
raceMeet = ele1.innerText
raceURL = ele1.innerHTML
ReDim Preserve racemeetArr(1 To 2, UBound(racemeetArr, 2) + 1)
racemeetArr(1, UBound(racemeetArr, 2)) = raceMeet
racemeetArr(2, UBound(racemeetArr, 2)) = raceURL
End If
Next ele1
Stop
Next i
ie.Quit
End Sub
编辑:
分析完HTTP请求后,我把代码精简了一点(可以直接查询结果,不用填表提交页面)
我不太喜欢昂贵的数组 ReDims,所以我创建了一个 class,并将结果保存在 class 的集合中(随意使用或不使用) .
添加一个新的 class 模块,将其命名为 clRaceMeet 并粘贴此代码:
Option Explicit
Private pMeet As String
Private pUrl As String
Public Property Let Meet(ByVal Val As String)
pMeet = Val
End Property
Public Property Get Meet() As String
Meet = pMeet
End Property
Public Property Let URL(ByVal Val As String)
pUrl = Val
End Property
Public Property Get URL() As String
URL = pUrl
End Property
然后,使用这个修改后的代码版本来抓取数据并将其转储到调试window:
Option Explicit
Private Sub CommandButton1_Click()
'dimension variables
Dim ie As InternetExplorer
Dim ieURL As String
Dim dd As Integer
Dim mm As Integer
Dim yyyy As Integer
Dim startDate As Date
Dim endDate As Date
Dim i As Long
Dim htmlDoc As MSHTML.IHTMLDocument
Dim colLeftEleColl As MSHTML.IHTMLElementCollection
Dim colLeftEle As MSHTML.IHTMLElement
Dim centerEleColl As MSHTML.IHTMLElementCollection
Dim centerEle As MSHTML.IHTMLElement
Dim raceMeet As String
Dim raceURL As String
Dim objRaceMeet As clRaceMeet
Dim raceMeetColl As New Collection
'Open InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
startDate = #1/1/2009#
endDate = Date - 1
For i = startDate To endDate
dd = Day(i)
mm = Month(i)
yyyy = Year(i)
ieURL = "http://www.racenet.com.au/horse-racing-results-search.asp?Mdate=" & yyyy & "-" & mm & "-" & dd
ie.navigate ieURL
Do
' Wait until the Browser is loaded'
Loop Until ie.readyState = READYSTATE_COMPLETE
Set htmlDoc = ie.document
'insert scraper
Set colLeftEleColl = htmlDoc.getElementById("ColLeft").all
'Loop through elements of ColLeft div
For Each colLeftEle In colLeftEleColl
If colLeftEle.tagName = "CENTER" Then
Set centerEleColl = colLeftEle.all
'Loop through elements of <center> tag
For Each centerEle In centerEleColl
If centerEle.tagName = "A" Then
If InStr(centerEle.href, "/horse-racing-results/") > 0 Then
raceMeet = centerEle.innerText
raceURL = centerEle.href
Set objRaceMeet = New clRaceMeet
objRaceMeet.Meet = raceMeet
objRaceMeet.URL = raceURL
raceMeetColl.Add objRaceMeet
End If
End If
Next centerEle
Exit For
End If
Next colLeftEle
' Dump results to immediate window:
For Each objRaceMeet In raceMeetColl
Debug.Print objRaceMeet.Meet & " - " & objRaceMeet.URL
Next objRaceMeet
'Stop
Next i
ie.Quit
End Sub
祝您下注愉快! :)
我玩弄了最后一个,for next 循环中的 for each 循环必须紧随其后。然后我也将它列在 sheet1 中并且它起作用了。我做了一些小的调整,例如添加一个变量来增加单元格。
这段代码并没有产生网站的实际结果,不确定这是否是您的目标。