Excel VBA 从超链接列表中提取 Web 数据
Excel VBA Pulling Web Data From a List of Hyperlinks
我在 sheet 1 的 C 列中有一个 hyperlink 列表,我想从每个 link 中提取数据并为每个 link在已经创建的单独工作sheet中。所有 hyperlink 都指向同一个网站...职业足球参考...但每个 link 都针对不同的 NFL 球员。我想为每个玩家提取相同的数据 table。我已经能够从第一个 link 中提取数据并将其按原样放入 sheet 2 中,但我对 VBA 很陌生,不知道如何创建为我列表中的每个 link 执行此操作并将其放入其他 sheet 的循环。下面是我目前必须从第一个 link:
获取数据的代码
Sub passingStats()
Dim x As Long, y As Long
Dim htm As Object
Set htm = CreateObject("htmlFile")
With CreateObject("msxml2.xmlhttp")
.Open "GET", Range("C2"), False
.send
htm.body.innerhtml = .responsetext
End With
With htm.getelementbyid("passing")
For x = 0 To .Rows.Length - 1
For y = 0 To .Rows(x).Cells.Length - 1
Sheets(2).Cells(x + 4, y + 1).Value = .Rows(x).Cells(y).innertext
Next y
Next x
End With
End Sub
如有任何帮助,我们将不胜感激。
以下使用循环显示。
N.B。
- 您的 table 文章中存在逻辑缺陷,我已经为其编写了补丁。
- 某些字符串在您的脚本中被错误地转换。我已经用
'
作为前缀来阻止它。
代码:
Option Explicit
Public Sub GetInfo()
Dim html As New HTMLDocument, links(), link As Long, wsSourceSheet As Worksheet
Dim hTable As HTMLTable, ws As Worksheet, playerName As String
Set wsSourceSheet = ThisWorkbook.Worksheets("Sheet1") '<change to sheet containing links
Application.ScreenUpdating = False
With wsSourceSheet
links = .Range("C2:C" & .Cells(.Rows.Count, "C").End(xlUp).Row).Value
End With
For link = LBound(links, 1) To UBound(links, 1)
If InStr(links(link, 1), "https://") > 0 Then
Set html = GetHTMLDoc(links(link, 1))
Set hTable = html.getElementById("passing")
If Not hTable Is Nothing Then
playerName = GetNameAbbr(links(link, 1))
Set ws = AddPlayerSheet(playerName)
WriteTableToSheet hTable, ws
FixTable ws
End If
End If
Next
Application.ScreenUpdating = True
End Sub
Public Function GetHTMLDoc(ByVal url As String) As HTMLDocument
Dim sResponse As String, html As New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", url, False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
html.body.innerHTML = sResponse
Set GetHTMLDoc = html
End Function
Public Sub WriteTableToSheet(ByVal hTable As HTMLTable, ByVal ws As Worksheet)
Dim x As Long, y As Long
With hTable
For x = 0 To .Rows.Length - 1
For y = 0 To .Rows(x).Cells.Length - 1
If y = 6 Or y = 7 Then
ws.Cells(x + 4, y + 1).Value = Chr$(39) & .Rows(x).Cells(y).innerText
Else
ws.Cells(x + 4, y + 1).Value = .Rows(x).Cells(y).innerText
End If
Next y
Next x
End With
End Sub
Public Function GetNameAbbr(ByVal url As String) As String
Dim tempArr() As String
tempArr = Split(url, "/")
GetNameAbbr = Left$(tempArr(UBound(tempArr)), 6)
End Function
Public Function AddPlayerSheet(ByVal playerName As String) As Worksheet
Dim ws As Worksheet
If SheetExists(playerName) Then
Application.DisplayAlerts = False
ThisWorkbook.Worksheets(playerName).Delete
Application.DisplayAlerts = True
End If
Set ws = ThisWorkbook.Worksheets.Add
ws.Name = playerName
Set AddPlayerSheet = ws
End Function
Public Function SheetExists(ByVal playerName As String) As Boolean
SheetExists = Evaluate("ISREF('" & playerName & "'!A1)")
End Function
Public Sub FixTable(ByVal ws As Worksheet)
Dim found As Range, numSummaryRows As Long
With ws
Set found = .Columns("A").Find("Career")
If found Is Nothing Then Exit Sub
numSummaryRows = .Cells(.Rows.Count, "A").End(xlUp).Row - found.Row
numSummaryRows = IIf(numSummaryRows = 0, 1, numSummaryRows + 1)
Debug.Print found.Offset(, 1).Resize(numSummaryRows, 30).Address, ws.Name
found.Offset(, 1).Resize(numSummaryRows, 30).Copy found.Offset(, 2)
found.Offset(, 1).Resize(numSummaryRows, 1).ClearContents
End With
End Sub
测试工作表 1 中的链接:
示例网页:
对应的代码写出来:
我在 sheet 1 的 C 列中有一个 hyperlink 列表,我想从每个 link 中提取数据并为每个 link在已经创建的单独工作sheet中。所有 hyperlink 都指向同一个网站...职业足球参考...但每个 link 都针对不同的 NFL 球员。我想为每个玩家提取相同的数据 table。我已经能够从第一个 link 中提取数据并将其按原样放入 sheet 2 中,但我对 VBA 很陌生,不知道如何创建为我列表中的每个 link 执行此操作并将其放入其他 sheet 的循环。下面是我目前必须从第一个 link:
获取数据的代码Sub passingStats()
Dim x As Long, y As Long
Dim htm As Object
Set htm = CreateObject("htmlFile")
With CreateObject("msxml2.xmlhttp")
.Open "GET", Range("C2"), False
.send
htm.body.innerhtml = .responsetext
End With
With htm.getelementbyid("passing")
For x = 0 To .Rows.Length - 1
For y = 0 To .Rows(x).Cells.Length - 1
Sheets(2).Cells(x + 4, y + 1).Value = .Rows(x).Cells(y).innertext
Next y
Next x
End With
End Sub
如有任何帮助,我们将不胜感激。
以下使用循环显示。
N.B。
- 您的 table 文章中存在逻辑缺陷,我已经为其编写了补丁。
- 某些字符串在您的脚本中被错误地转换。我已经用
'
作为前缀来阻止它。
代码:
Option Explicit
Public Sub GetInfo()
Dim html As New HTMLDocument, links(), link As Long, wsSourceSheet As Worksheet
Dim hTable As HTMLTable, ws As Worksheet, playerName As String
Set wsSourceSheet = ThisWorkbook.Worksheets("Sheet1") '<change to sheet containing links
Application.ScreenUpdating = False
With wsSourceSheet
links = .Range("C2:C" & .Cells(.Rows.Count, "C").End(xlUp).Row).Value
End With
For link = LBound(links, 1) To UBound(links, 1)
If InStr(links(link, 1), "https://") > 0 Then
Set html = GetHTMLDoc(links(link, 1))
Set hTable = html.getElementById("passing")
If Not hTable Is Nothing Then
playerName = GetNameAbbr(links(link, 1))
Set ws = AddPlayerSheet(playerName)
WriteTableToSheet hTable, ws
FixTable ws
End If
End If
Next
Application.ScreenUpdating = True
End Sub
Public Function GetHTMLDoc(ByVal url As String) As HTMLDocument
Dim sResponse As String, html As New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", url, False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
html.body.innerHTML = sResponse
Set GetHTMLDoc = html
End Function
Public Sub WriteTableToSheet(ByVal hTable As HTMLTable, ByVal ws As Worksheet)
Dim x As Long, y As Long
With hTable
For x = 0 To .Rows.Length - 1
For y = 0 To .Rows(x).Cells.Length - 1
If y = 6 Or y = 7 Then
ws.Cells(x + 4, y + 1).Value = Chr$(39) & .Rows(x).Cells(y).innerText
Else
ws.Cells(x + 4, y + 1).Value = .Rows(x).Cells(y).innerText
End If
Next y
Next x
End With
End Sub
Public Function GetNameAbbr(ByVal url As String) As String
Dim tempArr() As String
tempArr = Split(url, "/")
GetNameAbbr = Left$(tempArr(UBound(tempArr)), 6)
End Function
Public Function AddPlayerSheet(ByVal playerName As String) As Worksheet
Dim ws As Worksheet
If SheetExists(playerName) Then
Application.DisplayAlerts = False
ThisWorkbook.Worksheets(playerName).Delete
Application.DisplayAlerts = True
End If
Set ws = ThisWorkbook.Worksheets.Add
ws.Name = playerName
Set AddPlayerSheet = ws
End Function
Public Function SheetExists(ByVal playerName As String) As Boolean
SheetExists = Evaluate("ISREF('" & playerName & "'!A1)")
End Function
Public Sub FixTable(ByVal ws As Worksheet)
Dim found As Range, numSummaryRows As Long
With ws
Set found = .Columns("A").Find("Career")
If found Is Nothing Then Exit Sub
numSummaryRows = .Cells(.Rows.Count, "A").End(xlUp).Row - found.Row
numSummaryRows = IIf(numSummaryRows = 0, 1, numSummaryRows + 1)
Debug.Print found.Offset(, 1).Resize(numSummaryRows, 30).Address, ws.Name
found.Offset(, 1).Resize(numSummaryRows, 30).Copy found.Offset(, 2)
found.Offset(, 1).Resize(numSummaryRows, 1).ClearContents
End With
End Sub
测试工作表 1 中的链接:
示例网页:
对应的代码写出来: