如何使用 VBA 获取 Excel 第一页的 Google 搜索结果片段
How can I get Google search result snippets of first page in Excel using VBA
我在 A1:A1000
中有一个包含 1000 个关键字的列表。我想在每个关键字的相应单元格中获取第一页的 Google 搜索结果片段。例如:A1
单元格的搜索结果片段应在 B1
...*1
中,依此类推。非常感谢任何帮助。
考虑以下示例:
Option Explicit
Const TargetItemsQty = 30 ' results for each keyword
Sub GWebSearchIECtl()
Dim objSheet As Worksheet
Dim objIE As Object
Dim x As Long
Dim y As Long
Dim strSearch As String
Dim lngFound As Long
Dim st As String
Dim colGItems As Object
Dim varGItem As Variant
Dim strHLink As String
Dim strDescr As String
Dim strNextURL As String
Set objSheet = Sheets("Sheet1")
Set objIE = CreateObject("InternetExplorer.Application")
objIE.Visible = True ' for debug or captcha request cases
y = 1 ' start searching for the keyword in the first row
With objSheet
.Select
.Range(.Columns("B:B"), .Columns("B:B").End(xlToRight)).Delete ' clear previous results
.Range("A1").Select
Do Until .Cells(y, 1) = ""
x = 2 ' start writing results from column B
.Cells(y, 1).Select
strSearch = .Cells(y, 1) ' current keyword
With objIE
lngFound = 0
.Navigate "https://www.google.com/search?q=" & EncodeUriComponent(strSearch) ' go to first search results page
Do
Do While .Busy Or Not .readyState = 4: DoEvents: Loop ' wait IE
Do Until .document.readyState = "complete": DoEvents: Loop ' wait document
Do While TypeName(.document.getElementById("res")) = "Null": DoEvents: Loop ' wait [#res] element
Set colGItems = .document.getElementById("res").getElementsByClassName("g") ' collection of search result [.g] items
For Each varGItem In colGItems ' process each item in collection
If varGItem.getElementsByTagName("a").Length > 0 And varGItem.getElementsByClassName("st").Length > 0 Then ' must have hyperlink and description
strHLink = varGItem.getElementsByTagName("a")(0).href ' get first hyperlink [a] found in current item
strDescr = GetInnerText(varGItem.getElementsByClassName("st")(0).innerHTML) ' get first description [span.st] found in current item
lngFound = lngFound + 1
With objSheet ' put result into cell
.Hyperlinks.Add .Cells(y, x), strHLink, , , strDescr
.Cells(y, x).WrapText = True
x = x + 1 ' next column
End With
If lngFound = TargetItemsQty Then Exit Do ' continue with next keyword - necessary quantity of the results for current keyword found
End If
DoEvents
Next
If TypeName(.document.getElementById("pnnext")) = "Null" Then Exit Do ' continue with next keyword - no [a#pnnext.pn] next page button exists
strNextURL = .document.getElementById("pnnext").href ' get next page url
.Navigate strNextURL ' go to next search results page
Loop
End With
y = y + 1 ' next row
Loop
End With
objIE.Quit
' google web search page contains the elements:
' [div#res] - main search results block
' [div.g] - each result item block within [div#res]
' [a] - hyperlink ancor(s) within each [div.g]
' [span.st] - description(s) within each [div.g]
' [a#pnnext.pn] - hyperlink ancor to the next search results page
End Sub
Function EncodeUriComponent(strText As String) As String
Static objHtmlfile As Object
If objHtmlfile Is Nothing Then
Set objHtmlfile = CreateObject("htmlfile")
objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
End If
EncodeUriComponent = objHtmlfile.parentWindow.encode(strText)
End Function
Function GetInnerText(strText As String) As String
Static objHtmlfile As Object
If objHtmlfile Is Nothing Then
Set objHtmlfile = CreateObject("htmlfile")
objHtmlfile.Open
objHtmlfile.Write "<body></body>"
End If
objHtmlfile.body.innerHTML = strText
GetInnerText = objHtmlfile.body.innerText
End Function
我在 A1:A1000
中有一个包含 1000 个关键字的列表。我想在每个关键字的相应单元格中获取第一页的 Google 搜索结果片段。例如:A1
单元格的搜索结果片段应在 B1
...*1
中,依此类推。非常感谢任何帮助。
考虑以下示例:
Option Explicit
Const TargetItemsQty = 30 ' results for each keyword
Sub GWebSearchIECtl()
Dim objSheet As Worksheet
Dim objIE As Object
Dim x As Long
Dim y As Long
Dim strSearch As String
Dim lngFound As Long
Dim st As String
Dim colGItems As Object
Dim varGItem As Variant
Dim strHLink As String
Dim strDescr As String
Dim strNextURL As String
Set objSheet = Sheets("Sheet1")
Set objIE = CreateObject("InternetExplorer.Application")
objIE.Visible = True ' for debug or captcha request cases
y = 1 ' start searching for the keyword in the first row
With objSheet
.Select
.Range(.Columns("B:B"), .Columns("B:B").End(xlToRight)).Delete ' clear previous results
.Range("A1").Select
Do Until .Cells(y, 1) = ""
x = 2 ' start writing results from column B
.Cells(y, 1).Select
strSearch = .Cells(y, 1) ' current keyword
With objIE
lngFound = 0
.Navigate "https://www.google.com/search?q=" & EncodeUriComponent(strSearch) ' go to first search results page
Do
Do While .Busy Or Not .readyState = 4: DoEvents: Loop ' wait IE
Do Until .document.readyState = "complete": DoEvents: Loop ' wait document
Do While TypeName(.document.getElementById("res")) = "Null": DoEvents: Loop ' wait [#res] element
Set colGItems = .document.getElementById("res").getElementsByClassName("g") ' collection of search result [.g] items
For Each varGItem In colGItems ' process each item in collection
If varGItem.getElementsByTagName("a").Length > 0 And varGItem.getElementsByClassName("st").Length > 0 Then ' must have hyperlink and description
strHLink = varGItem.getElementsByTagName("a")(0).href ' get first hyperlink [a] found in current item
strDescr = GetInnerText(varGItem.getElementsByClassName("st")(0).innerHTML) ' get first description [span.st] found in current item
lngFound = lngFound + 1
With objSheet ' put result into cell
.Hyperlinks.Add .Cells(y, x), strHLink, , , strDescr
.Cells(y, x).WrapText = True
x = x + 1 ' next column
End With
If lngFound = TargetItemsQty Then Exit Do ' continue with next keyword - necessary quantity of the results for current keyword found
End If
DoEvents
Next
If TypeName(.document.getElementById("pnnext")) = "Null" Then Exit Do ' continue with next keyword - no [a#pnnext.pn] next page button exists
strNextURL = .document.getElementById("pnnext").href ' get next page url
.Navigate strNextURL ' go to next search results page
Loop
End With
y = y + 1 ' next row
Loop
End With
objIE.Quit
' google web search page contains the elements:
' [div#res] - main search results block
' [div.g] - each result item block within [div#res]
' [a] - hyperlink ancor(s) within each [div.g]
' [span.st] - description(s) within each [div.g]
' [a#pnnext.pn] - hyperlink ancor to the next search results page
End Sub
Function EncodeUriComponent(strText As String) As String
Static objHtmlfile As Object
If objHtmlfile Is Nothing Then
Set objHtmlfile = CreateObject("htmlfile")
objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
End If
EncodeUriComponent = objHtmlfile.parentWindow.encode(strText)
End Function
Function GetInnerText(strText As String) As String
Static objHtmlfile As Object
If objHtmlfile Is Nothing Then
Set objHtmlfile = CreateObject("htmlfile")
objHtmlfile.Open
objHtmlfile.Write "<body></body>"
End If
objHtmlfile.body.innerHTML = strText
GetInnerText = objHtmlfile.body.innerText
End Function