Excel VBA - 从网站获取所有 href 链接
Excel VBA - Get all href links from a website
你好,希望有人能帮助我。
在这个例子中 link:https://www.academiadasapostas.com/stats/competition/brasil/26
我想获取所有作为“VS”目标的 href link。
我正在尝试这样的例子:
Sub ScrapeScores()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
Dim HTMLTables As MSHTML.IHTMLElementCollection
Dim HTMLTable As MSHTML.IHTMLElement
Dim HTMLDiv As MSHTML.IHTMLElement
Dim TableSection As MSHTML.IHTMLElement
Dim TableRow As MSHTML.IHTMLElement
Dim TableCell As MSHTML.IHTMLElement
Dim RowText As String
IE.Visible = True
IE.navigate "https://www.academiadasapostas.com/stats/competition/brasil/26"
Do While IE.readyState <> READYSTATE_COMPLETE Or IE.Busy
Loop
Set HTMLDoc = IE.document
Set HTMLDiv = HTMLDoc.getElementById("competition-round-group-0")
Set HTMLTables = HTMLDiv.getElementsByTagName("a")
For Each HTMLTable In HTMLTables
Debug.Print HTMLTable.ID, "&", HTMLTable.className
For Each TableSection In HTMLTable.Children
Debug.Print , TableSection.tagName
Next TableSection
Next HTMLTable
End Sub
但没有成功。我想我可以将 CSS 与 SelectorAll 一起使用,对吧?由于 IE 即将淘汰,最好使用 CSS。
提前感谢您的回答。
您可以将以下 css 模式与 querySelectorAll .competition-rounds td:nth-child(4) > a
一起使用。循环返回的 nodeList
并从每个节点中提取 href
。这会选择 table 中的第 4 列,然后是子 a
标记,在循环期间从中提取 href
属性。
所需参考资料:
- Microsoft Internet 控件
- 微软HTML对象库
Option Explicit
Public Sub PrintLinks()
Dim ie As SHDocVw.InternetExplorer, nodeList As MSHTML.IHTMLDOMChildrenCollection
Set ie = New SHDocVw.InternetExplorer
With ie
.Visible = True
.Navigate2 "https://www.academiadasapostas.com/stats/competition/brasil/26"
While .Busy Or .readyState <> READYSTATE_COMPLETE: DoEvents: Wend
Set nodeList = ie.Document.querySelectorAll(".competition-rounds td:nth-child(4) > a")
Dim i As Long
For i = 0 To nodeList.length - 1
Debug.Print nodeList.Item(i).href
Next
Stop
.Quit
End With
End Sub
正在阅读:
如果您希望摆脱 IE,请执行以下操作:
Sub GetTragetLinks()
Const Url = "https://www.academiadasapostas.com/stats/competition/brasil/26"
Dim Html As HTMLDocument, I&
Set Html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", Url, False
.send
Html.body.innerHTML = .responseText
End With
With Html.querySelectorAll("tr[id*='gsm_id_'] td[title] > a")
For I = 0 To .Length - 1
Debug.Print .item(I).href
Next I
End With
End Sub
参考添加:
Microsoft HTML Object Library
你好,希望有人能帮助我。 在这个例子中 link:https://www.academiadasapostas.com/stats/competition/brasil/26
我想获取所有作为“VS”目标的 href link。 我正在尝试这样的例子:
Sub ScrapeScores()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
Dim HTMLTables As MSHTML.IHTMLElementCollection
Dim HTMLTable As MSHTML.IHTMLElement
Dim HTMLDiv As MSHTML.IHTMLElement
Dim TableSection As MSHTML.IHTMLElement
Dim TableRow As MSHTML.IHTMLElement
Dim TableCell As MSHTML.IHTMLElement
Dim RowText As String
IE.Visible = True
IE.navigate "https://www.academiadasapostas.com/stats/competition/brasil/26"
Do While IE.readyState <> READYSTATE_COMPLETE Or IE.Busy
Loop
Set HTMLDoc = IE.document
Set HTMLDiv = HTMLDoc.getElementById("competition-round-group-0")
Set HTMLTables = HTMLDiv.getElementsByTagName("a")
For Each HTMLTable In HTMLTables
Debug.Print HTMLTable.ID, "&", HTMLTable.className
For Each TableSection In HTMLTable.Children
Debug.Print , TableSection.tagName
Next TableSection
Next HTMLTable
End Sub
但没有成功。我想我可以将 CSS 与 SelectorAll 一起使用,对吧?由于 IE 即将淘汰,最好使用 CSS。
提前感谢您的回答。
您可以将以下 css 模式与 querySelectorAll .competition-rounds td:nth-child(4) > a
一起使用。循环返回的 nodeList
并从每个节点中提取 href
。这会选择 table 中的第 4 列,然后是子 a
标记,在循环期间从中提取 href
属性。
所需参考资料:
- Microsoft Internet 控件
- 微软HTML对象库
Option Explicit
Public Sub PrintLinks()
Dim ie As SHDocVw.InternetExplorer, nodeList As MSHTML.IHTMLDOMChildrenCollection
Set ie = New SHDocVw.InternetExplorer
With ie
.Visible = True
.Navigate2 "https://www.academiadasapostas.com/stats/competition/brasil/26"
While .Busy Or .readyState <> READYSTATE_COMPLETE: DoEvents: Wend
Set nodeList = ie.Document.querySelectorAll(".competition-rounds td:nth-child(4) > a")
Dim i As Long
For i = 0 To nodeList.length - 1
Debug.Print nodeList.Item(i).href
Next
Stop
.Quit
End With
End Sub
正在阅读:
如果您希望摆脱 IE,请执行以下操作:
Sub GetTragetLinks()
Const Url = "https://www.academiadasapostas.com/stats/competition/brasil/26"
Dim Html As HTMLDocument, I&
Set Html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", Url, False
.send
Html.body.innerHTML = .responseText
End With
With Html.querySelectorAll("tr[id*='gsm_id_'] td[title] > a")
For I = 0 To .Length - 1
Debug.Print .item(I).href
Next I
End With
End Sub
参考添加:
Microsoft HTML Object Library