提交带有嵌入结果的 HTML 网络表单后无法单击搜索结果元素 table - VBA 网络抓取

Cannot click search result elements after submitting HTML web form with embedded results table - VBA web scrape

我正在尝试从以下 URL 抓取数据:http://iswdataclient.azurewebsites.net/webSearchID.aspx?dbkey=parkercad。 我可以插入和查询一个 属性 ID,但是在加载搜索结果后,我无法成功点击结果 table 中的 "View Property" link。

我的初始调试表明该表单实际上并未提交,这意味着网页上不存在 link。但是,后续结果页面中的 HTML 显示了搜索结果的附加元素。我已尝试以下等待网页加载但未成功,但我认为这不是时间问题:

Do While ie.Busy Or ie.readyState <> 4: DoEvents: Loop

Do While oIE.ReadyState = 4: WScript.Sleep 100: Loop
Do While oIE.ReadyState <> 4: WScript.Sleep 100: Loop

Do While IE.ReadyState = 4: DoEvents: Loop   
Do Until IE.ReadyState = 4: DoEvents: Loop   

Application.Wait (Now + TimeValue("00:00:03"))

我已经分析了 HTML 多种方法,同时考虑到事件处理问题,从表单级别开始向下钻取:

Set ie = CreateObject("internetexplorer.application")
With ie
    .navigate "http://iswdataclient.azurewebsites.net/webProperty.aspx?dbkey=parkercad
    .Visible = True
    Do While .Busy Or .readyState <> 4
    DoEvents
    Loop
End With

For Each propid In Range(Cells(2, 8), Cells(2, 8)) 'Cells(Range("H" & Rows.Count).End(xlUp).Row, 8)) 'propid = R000001972
    If propid <> "N/A" Then 
    On Error Resume Next
        With ie.document.body
            For iFRM = 0 To .getElementsByTagName("form").Length - 1
                If .getElementsByTagName("form")(iFRM).ID = "searchForm" Then
                    With .getElementsByTagName("form")(iFRM)
                        For iNPT = 0 To .getElementsByTagName("input").Length - 1
                            Select Case .getElementsByTagName("input")(iNPT).Name
                                Case "ucSearchID$searchid"
                                    .getElementsByTagName("input")(iNPT).Value = propid
                                Case "ucSearchID$ButtonSearch"
                                    .getElementsByTagName("input")(iNPT).Click
                            End Select
                        Next iNPT
                            Do While ie.Busy Or ie.readyState <> 4: DoEvents: Loop
                            Application.Wait (Now + TimeValue("00:00:02"))
                        Exit For
                        End With
           Exit For
                End If
           Next iFRM
       End With

以及所需元素的简单解析:

Set ie = CreateObject("internetexplorer.application")
With ie
    .navigate "http://iswdataclient.azurewebsites.net/webProperty.aspx?dbkey=parkercad
    .Visible = True
    Do While .Busy Or .readyState <> 4
    DoEvents
    Loop
End With

Set intags = ie.document.getElementsByTagName("input")
For Each intag In intags
    If intag.classname = "searchid" Then
        intag.Value = propid 
        Set evt = ie.document.createEvent("keyboardevent")
        evt.initEvent "change", True, False
        intag.dispatchEvent evt
    End If
Next intag

ie.document.getelementbyid("ucSearchID_ButtonSearch").Click
While ie.readyState <> 4 Or ie.Busy: DoEvents: Wend

以及 table 单元格的下钻,我删除了代码。虽然我认为可能存在事件处理问题,网页更新,但我无法从结果 table 中解析更新的 HTML。

Debug.Print ie.document.getelementbyid("lblResults").innerText

Debug.Print returns "Your search of ' ' returned 0 result(s)",而网页反映搜索成功,“您对 'R000001972' 的搜索返回了 1 个结果。所以,我的代码成功提交表单但未执行结果页面 "View Property" link 点击,因为它无法解析更新后的 HTML:

For at = 0 To ie.document.getElementsByTagName("a").Length - 1
    Select Case ie.document.getElementsByTagName("a")(at).ID
        Case "ucResultsGrid_" & propid
            ie.document.getElementsByTagName("a")(at).Click
    End Select
Next at

这似乎不是时间或事件处理问题。不确定如何进行。任何帮助将不胜感激。

这是一个 aspx 页面。您可以以简化的形式执行相同的 GET 和 POST 请求。我使用剪贴板写出样本表。您可以随意修改。

Option Explicit

Public Sub GetPropertyInfo()
    Dim html As MSHTML.HTMLDocument, xhr As Object

    Application.ScreenUpdating = False

    Set html = New MSHTML.HTMLDocument
    Set xhr = CreateObject("MSXML2.ServerXMLHTTP")

    Dim body As String, propertyId As String

    propertyId = "R000001972"

    With xhr
        .Open "GET", "http://iswdataclient.azurewebsites.net/webSearchID.aspx?dbkey=parkercad&stype=id&sdata=" & propertyId, False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .send
        html.body.innerHTML = .responseText
        If html.querySelectorAll("#dvPrimary table tr").Length <= 1 Then Exit Sub
        body = GetPostBody(html, propertyId)
        .Open "POST", "http://iswdataclient.azurewebsites.net/webProperty.aspx?dbkey=parkercad&stype=id&sdata=" _
                   & propertyId & "&id=" & propertyId, False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .send body
        html.body.innerHTML = .responseText
    End With

    Dim ws As Worksheet, clipboard As Object, i As Long

    Set ws = ThisWorkbook.Worksheets(1)
    Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")

    With ws.Cells
        .ClearContents
        .ClearFormats
    End With

    With html.querySelectorAll("table")
        For i = 8 To .Length - 1
            clipboard.SetText .Item(i).outerHTML
            clipboard.PutInClipboard
            ws.Range("A" & GetLastRow(ws) + 2).PasteSpecial
        Next
    End With
    Application.ScreenUpdating = True
End Sub

Public Function GetPostBody(ByVal html As MSHTML.HTMLDocument, ByVal propertyId As String) As String
    Dim i As Long, result As String

    With html.querySelectorAll("input[type=hidden]")
        For i = 0 To .Length - 1
            result = result & .Item(i).ID & "=" & .Item(i).Value & "&"
        Next
    End With
    result = result & "__EVENTTARGET=ucResultsGrid$" & propertyId
    GetPostBody = result
End Function

Public Function GetLastRow(ByVal sh As Worksheet) As Long
    On Error Resume Next
    GetLastRow = sh.Cells.Find(What:="*", _
                               After:=sh.Range("A1"), _
                               Lookat:=xlPart, _
                               LookIn:=xlFormulas, _
                               SearchOrder:=xlByRows, _
                               SearchDirection:=xlPrevious, _
                               MatchCase:=False).Row
    On Error GoTo 0
End Function

参考资料(VBE > 工具 > 参考资料):

  1. 微软HTML对象库