从 yahoo finance 的 html table 中提取数据 | Excel VBA 网页抓取

extract figures from a html table out of yahoo finance | Excel VBA webscraping

您好,我需要有关此代码的帮助,我正在尝试从此页面“收入历史记录”块中提取数据: https://finance.yahoo.com/quote/MSFT/analysis?p=MSFT

已经坐了 4 个小时并为此感到沮丧。非常感谢任何 help/hint!

问候米兰

Dim objIE As InternetExplorer
Dim aEle As HTMLDocument
Dim y As Long
Dim x As String
Dim lastrow As Long

Set objIE = New InternetExplorer

objIE.Visible = True

lastrow = Sheets("Table5").usedrange.Row - 1 + Sheets("Table5").usedrange.Rows.Count


For y = 11 To lastrow Step 2

x = Sheets("Table5").Range("A" & y).Value
If x = "" Then
Exit Sub
Else
objIE.navigate "https://finance.yahoo.com/quote/" & x & "/analysis?p=" & x

Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

        For Each aEle In objIE.document.getElementsByClassName("BdT Bdc($seperatorColor)")
    
        'On Error Resume Next
        If InStr(aEle.innerText, "EPS Actual") > 0 Then
        Sheets("Table5").Range("T" & y).Value = aEle.Children(4).innerText
        Sheets("Table5").Range("U" & y).Value = aEle.Children(3).innerText
        Sheets("Table5").Range("V" & y).Value = aEle.Children(2).innerText
        Sheets("Table5").Range("W" & y).Value = aEle.Children(1).innerText

        Exit For
        End If
        Next
       
    
End If
Next y
End Sub

我喜欢使用来自 get hub 的 VBA Seleniumbasic Chrome 驱动程序的建议。我发现它更容易使用。然后,您可以使用 Chrome 扩展 RUTO XPath Finder,查找页面上元素的 XPath 和 table 引用。然后只需引用 xpath。两者都可以在没有管理员权限的情况下安装。

您可以使用 xmlhttp 请求从该网站的 table Earnings History 行中获取第 EPS Actua 行的内容。你不需要去 IE。试一试。

Sub FetchValue()
    Const Url$ = "https://finance.yahoo.com/quote/MSFT/analysis?p=MSFT&guccounter=1"
    Dim elem As Object, tRow As Object, S$, R&

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", Url, False
        .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"
        .Send
        S = .responseText
    End With
    
    With CreateObject("HTMLFile")
        .write S
        For Each elem In .getElementsByTagName("tr")
            If InStr(elem.innerText, "EPS Actual") > 0 Then
                R = R + 1: Cells(R, 1) = elem.Children(1).innerText
                Cells(R, 2) = elem.Children(2).innerText
                Cells(R, 3) = elem.Children(3).innerText
                Cells(R, 4) = elem.Children(4).innerText
                Exit For
            End If
        Next elem
    End With
End Sub