从网络获取超链接
Get hyperlinks from web
我正在尝试使用 hyperlinks 从网络上获取数据。我从网上复制了数据并粘贴到 excel。整个数据已粘贴在单个单元格中,当我将数据与文本分离到列时,没有携带 hyperlink。
来源link:https://www.sec.gov/cgi-bin/current?q1=3&q2=6&q3=
我还尝试使用 "From Web" 选项将数据转储到 Excel 中。不幸的是,没有携带 hyperlink。能帮忙提点建议吗?
谢谢
宏只抓取来自 table(不是 table)的所有链接(第二和第三列)。这需要一点时间。等到IE关闭。请阅读代码中的注释:
Sub LinkList()
Dim url As String
Dim browser As Object
Dim nodeContainer As Object
Dim nodeAllLinks As Object
Dim nodeOneLink As Object
Dim currentRow As Long
Dim controlCounter As Long
ActiveSheet.Columns("B:B").NumberFormat = "@"
ActiveSheet.Columns("D:D").NumberFormat = "@"
currentRow = 2
url = "https://www.sec.gov/cgi-bin/current?q1=3&q2=6&q3="
'Initialize Internet Explorer, set visibility,
'call URL and wait until page is fully loaded
Set browser = CreateObject("internetexplorer.application")
browser.Visible = True 'You can set this to False to make the IE invisible
browser.navigate url
Do Until browser.ReadyState = 4: DoEvents: Loop
'Get the container with all links inside
Set nodeContainer = browser.document.getElementsByTagName("pre")(0)
'Get all links in a node collection
Set nodeAllLinks = nodeContainer.getElementsByTagName("a")
'Get each link
For Each nodeOneLink In nodeAllLinks
'Every second link should be in the same row than the first link of a HTML table row
If controlCounter Mod 2 = 0 Then
With ActiveSheet
'Set link as link
.Hyperlinks.Add Anchor:=.Cells(currentRow, 1), Address:=nodeOneLink.href, TextToDisplay:=nodeOneLink.href
'Write the text of the link from the page to the column afte the link in Excel
.Cells(currentRow, 2).Value = nodeOneLink.innertext
End With
Else
With ActiveSheet
.Hyperlinks.Add Anchor:=.Cells(currentRow, 3), Address:=nodeOneLink.href, TextToDisplay:=nodeOneLink.href
.Cells(currentRow, 4).Value = nodeOneLink.innertext
End With
currentRow = currentRow + 1
End If
'Increment the control variable to devide between first and second link
controlCounter = controlCounter + 1
Next nodeOneLink
'Clean up
browser.Quit
Set browser = Nothing
Set nodeContainer = Nothing
Set nodeAllLinks = Nothing
Set nodeOneLink = Nothing
ActiveSheet.Columns("A:D").EntireColumn.AutoFit
End Sub
我正在尝试使用 hyperlinks 从网络上获取数据。我从网上复制了数据并粘贴到 excel。整个数据已粘贴在单个单元格中,当我将数据与文本分离到列时,没有携带 hyperlink。
来源link:https://www.sec.gov/cgi-bin/current?q1=3&q2=6&q3=
我还尝试使用 "From Web" 选项将数据转储到 Excel 中。不幸的是,没有携带 hyperlink。能帮忙提点建议吗?
谢谢
宏只抓取来自 table(不是 table)的所有链接(第二和第三列)。这需要一点时间。等到IE关闭。请阅读代码中的注释:
Sub LinkList()
Dim url As String
Dim browser As Object
Dim nodeContainer As Object
Dim nodeAllLinks As Object
Dim nodeOneLink As Object
Dim currentRow As Long
Dim controlCounter As Long
ActiveSheet.Columns("B:B").NumberFormat = "@"
ActiveSheet.Columns("D:D").NumberFormat = "@"
currentRow = 2
url = "https://www.sec.gov/cgi-bin/current?q1=3&q2=6&q3="
'Initialize Internet Explorer, set visibility,
'call URL and wait until page is fully loaded
Set browser = CreateObject("internetexplorer.application")
browser.Visible = True 'You can set this to False to make the IE invisible
browser.navigate url
Do Until browser.ReadyState = 4: DoEvents: Loop
'Get the container with all links inside
Set nodeContainer = browser.document.getElementsByTagName("pre")(0)
'Get all links in a node collection
Set nodeAllLinks = nodeContainer.getElementsByTagName("a")
'Get each link
For Each nodeOneLink In nodeAllLinks
'Every second link should be in the same row than the first link of a HTML table row
If controlCounter Mod 2 = 0 Then
With ActiveSheet
'Set link as link
.Hyperlinks.Add Anchor:=.Cells(currentRow, 1), Address:=nodeOneLink.href, TextToDisplay:=nodeOneLink.href
'Write the text of the link from the page to the column afte the link in Excel
.Cells(currentRow, 2).Value = nodeOneLink.innertext
End With
Else
With ActiveSheet
.Hyperlinks.Add Anchor:=.Cells(currentRow, 3), Address:=nodeOneLink.href, TextToDisplay:=nodeOneLink.href
.Cells(currentRow, 4).Value = nodeOneLink.innertext
End With
currentRow = currentRow + 1
End If
'Increment the control variable to devide between first and second link
controlCounter = controlCounter + 1
Next nodeOneLink
'Clean up
browser.Quit
Set browser = Nothing
Set nodeContainer = Nothing
Set nodeAllLinks = Nothing
Set nodeOneLink = Nothing
ActiveSheet.Columns("A:D").EntireColumn.AutoFit
End Sub