使用 Excel VBA 从网页中提取数据

Use Excel VBA to Extract Data From a Webpage

我正在尝试使用 Excel VBA 从网页 (https://www.churchofjesuschrist.org/maps/meetinghouses/lang=eng&q=1148+W+100+N) 中提取一些数据。我使用的代码将打开 Internet Explorer,导航到网站,然后提取最上面的结果。但我似乎无法弄清楚如何提取其余结果(即病房、语言、联系人姓名、联系人#)。想法?

Sub MeethinghouseLocator()

Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE.navigate Sheets("Sheet1").Range("A1").Value
IE.Visible = True
While IE.Busy
DoEvents
Wend

Application.Wait (Now + TimeValue("0:00:01"))

IE.document.querySelector("button.search-input__execute.button--primary").Click

  Dim Doc As HTMLDocument
  Set Doc = IE.document
        
Application.Wait (Now + TimeValue("0:00:01"))
        
'WardName
    Dim aaaaFONT As String
    aaaaFONT = Trim(Doc.getElementsByClassName("location-header__name ng-binding")(0).innerText)
    Sheets("Sheet1").Range("D6").Value = aaaaFONT
    
Application.Wait (Now + TimeValue("0:00:01"))
    
'Language
    Dim aaabFONT As String
    aaabFONT = Trim(Doc.getElementsByClassName("location-header__language ng-binding ng-scope")(0).innerText)
    Sheets("Sheet1").Range("E6").Value = aaabFONT

'Click 1st Link
    IE.document.getElementsByClassName("location-header__name ng-binding")(0).Click

Application.Wait (Now + TimeValue("0:00:01"))

'Contact Name
    Dim aaacFONT As String
    aaacFONT = Trim(Doc.getElementsByClassName("maps-card__group maps-card__group--inline ng-scope")(2).innerText)
    Sheets("Sheet1").Range("H6").Value = aaacFONT

'Contact Name Function
    Range("F6").Select
    ActiveCell.FormulaR1C1 = _
        "=LEFT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),FIND(RIGHT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),LEN(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-FIND(CHAR(10),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-1)"

'Contact Phone Number
    Dim aaadFONT As String
    aaadFONT = Trim(Doc.getElementsByClassName("phone ng-binding")(0).innerText)
    Sheets("Sheet1").Range("G6").Value = aaadFONT

    
  IE.Quit

End Sub

你的大部分代码实际上都有效,所以我不确定你遇到了什么问题,但你没有考虑点击每个 link 后的加载,所以我添加了 While 循环在继续之前检查其 ReadyReadyState 属性。

编辑:代码现在遍历结果中列出的所有病房,想法是将第一个 IE 保留在结果页面并将病房的 URL 和输入行传递给子 ExtractWard 它将打开另一个 IE,导航到给定的 URL 并提取病房详细信息。

Sub MeethinghouseLocator()
   
    Dim IE As Object
    Set IE = CreateObject("InternetExplorer.Application")
    IE.navigate Sheets("Sheet1").Range("A1").Value
    IE.Visible = True
    While IE.Busy Or IE.readyState <> 4
        DoEvents
    Wend
            
    IE.document.querySelector("button.search-input__execute.button--primary").Click
    
    While IE.Busy Or IE.readyState <> 4
        DoEvents
    Wend
    Dim Doc As HTMLDocument
    Set Doc = IE.document
        
    Application.Wait (Now + TimeValue("0:00:01"))
        
    Dim wardContent As Object
    Set wardContent = Doc.getElementsByClassName("maps-card__content")(2)
    
    Dim wardCollection As Object
    Set wardCollection = wardContent.getElementsByClassName("location-header")
    
    Dim rowNum As Long
    rowNum = 6
        
    Dim i As Long
    For i = 0 To wardCollection.Length - 1
        With wardCollection(i)
            'WardName
            Dim aaaaFONT As String
            aaaaFONT = Trim(.getElementsByClassName("location-header__name ng-binding")(0).innerText)
            Sheets("Sheet1").Cells(rowNum, "D").Value = aaaaFONT
                
            'Language
            Dim aaabFONT As String
            aaabFONT = Trim(.getElementsByClassName("location-header__language ng-binding ng-scope")(0).innerText)
            Sheets("Sheet1").Cells(rowNum, "E").Value = aaabFONT
        
            Dim wardURL As String
            wardURL = .getElementsByClassName("location-header__name ng-binding")(0).href
            
            ExtractWard wardURL, rowNum
        End With
        
        rowNum = rowNum + 1
    Next i
            
    Set Doc = Nothing
    IE.Quit
    Set IE = Nothing
End Sub

Private Sub ExtractWard(argURL As String, argRow As Long)
    Dim IE As Object
    Set IE = CreateObject("InternetExplorer.Application")
    IE.navigate argURL
    IE.Visible = True
    While IE.Busy Or IE.readyState <> 4
        DoEvents
    Wend
            
    Dim Doc As HTMLDocument
    Set Doc = IE.document
            
    'Contact Name
    Dim aaacFONT As String
    aaacFONT = Trim(Doc.getElementsByClassName("maps-card__group maps-card__group--inline ng-scope")(2).innerText)
    Sheets("Sheet1").Cells(argRow, "H").Value = aaacFONT
    
    'Contact Name Function
    Sheets("Sheet1").Cells(argRow, "F").FormulaR1C1 = _
        "=LEFT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),FIND(RIGHT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),LEN(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-FIND(CHAR(10),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-1)"
    
    'Contact Phone Number
    Dim aaadFONT As String
    aaadFONT = Trim(Doc.getElementsByClassName("phone ng-binding")(0).innerText)
    Sheets("Sheet1").Cells(argRow, "G").Value = aaadFONT
        
    Set Doc = Nothing
    IE.Quit
    Set IE = Nothing
End Sub