使用 Excel VBA 加载与 IE11 不兼容的网站

Using Excel VBA to load a website that is incompatible with IE11

在 Excel VBA 中加载网站并将其放入 sheet 我一直在使用以下内容:

Dim IE As Object  
Set IE = CreateObject("InternetExplorer.Application")  
IE .navigate "https://www.wsj.com/market-data/bonds/treasuries"

然后我可以将其复制并粘贴到我的 Excel sheet 中。 但是这个网站已经不能用IE11了,ExcelVBA虽然IE11快要被淘汰了,但还是坚持用IE11

还有别的办法吗?我也看过:

肯定有一种简单的方法可以将网站或网站内的 table 加载到 Excel sheet 中?这一定是一条人迹罕至的道路,但经过大量谷歌搜索后,我找不到一个实际可行的简单解决方案。

我有 5-10 个网页正在加载到这个工作簿中,这似乎是一项全职工作,让整个工作正常进行!!任何thoughts/help非常感谢!!!

以下代码(不使用网络驱动程序)有效,但不是一个简单的解决方案。我能够找到存储在正文中的信息,这些信息通过使用 REGEX 隔离,然后存储到 JSON 文件中进行解析。

Dim XMLPage As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim strPattern As String: strPattern = "window.__STATE__ = ({.+}}}});"
Dim JSON As Object
Dim Key As Variant
Dim key1, key2 As String

XMLPage.Open "GET", "https://www.wsj.com/market-data/bonds/treasuries", False
XMLPage.send

Set JSON = JsonConverter.ParseJson(REGEX(XMLPage.responseText, strPattern, ""))

' Notes and Bonds
key1 = "mdc_treasury_{" & """" & "treasury" & """" & ":" & """" & "NOTES_AND_BONDS" & """" & "}"

For Each Key In JSON("data")(key1)("data")("data")("instruments")
    Debug.Print Key("maturityDate")
    Debug.Print Key("ask")
    Debug.Print Key("askYield")
    Debug.Print Key("bid")
    Debug.Print Key("change")
Next Key

 ' Bills
key2 = "mdc_treasury_{" & """" & "treasury" & """" & ":" & """" & "BILLS" & """" & "}"

For Each Key In JSON("data")(key2)("data")("data")("instruments")
    Debug.Print Key("maturityDate")
    Debug.Print Key("ask")
    Debug.Print Key("askYield")
    Debug.Print Key("bid")
    Debug.Print Key("change")
Next Key

需要将以下函数复制到模块中:

Function REGEX(strInput As String, matchPattern As String, Optional ByVal outputPattern As String = "[=11=]") As Variant
    Dim inputRegexObj As New VBScript_RegExp_55.RegExp, outputRegexObj As New VBScript_RegExp_55.RegExp, outReplaceRegexObj As New VBScript_RegExp_55.RegExp
    Dim inputMatches As Object, replaceMatches As Object, replaceMatch As Object
    Dim replaceNumber As Integer

    With inputRegexObj
        .Global = True
        .MultiLine = True
        .IgnoreCase = False
        .Pattern = matchPattern
    End With
    With outputRegexObj
        .Global = True
        .MultiLine = True
        .IgnoreCase = False
        .Pattern = "$(\d+)"
    End With
    With outReplaceRegexObj
        .Global = True
        .MultiLine = True
        .IgnoreCase = False
    End With

    Set inputMatches = inputRegexObj.Execute(strInput)
    If inputMatches.Count = 0 Then
        REGEX = False
    Else
        Set replaceMatches = outputRegexObj.Execute(outputPattern)
        For Each replaceMatch In replaceMatches
            replaceNumber = replaceMatch.SubMatches(0)
            outReplaceRegexObj.Pattern = "$" & replaceNumber

            If replaceNumber = 0 Then
                outputPattern = outReplaceRegexObj.Replace(outputPattern, inputMatches(0).value)
            Else
                If replaceNumber > inputMatches(0).SubMatches.Count Then
                    'regex = "A to high $ tag found. Largest allowed is $" & inputMatches(0).SubMatches.Count & "."
                    REGEX = CVErr(xlErrValue)
                    Exit Function
                Else
                    outputPattern = outReplaceRegexObj.Replace(outputPattern, inputMatches(0).SubMatches(replaceNumber - 1))
                End If
            End If
        Next
        REGEX = outputPattern
    End If
End Function

以下资源会有所帮助:

How to use Regular Expressions (Regex) in Microsoft Excel both in-cell and loops

https://github.com/VBA-tools/VBA-JSON

您需要安装 JSON 转换器并在库中引用正则表达式。 REGEX 函数是在堆栈溢出的其他地方找到的,因此其他人应该得到它的功劳。

与 Christopher 在使用正则表达式时的回答类似的想法。我正在获取仪器数据(JS 数组),将组件字典拆分出来(减去末尾 }),然后使用基于 headers 的正则表达式来获取适当的值。

我使用字典来处理 input/output headers,并设置了几个请求 headers 以帮助发出基于浏览器的请求并减轻缓存结果的服务。

理想情况下,可以使用 html 解析器并获取 script 标记,然后在 JavaScript object 上使用 json 解析器script 标签。

如果您想要来自其他选项卡式结果的数据,我可以通过显式设置 re.Global = True 来添加它,然后循环返回的匹配项。取决于您是否想要这些以及您希望它们如何出现在 sheet(s).

我目前将结果写入名为 Treasury Notes & Bonds 的 sheet。


Option Explicit

Public Sub GetTradeData()
    Dim s As String, http As MSXML2.XMLHTTP60 'required reference Microsoft XML v6,
    
    Set http = New MSXML2.XMLHTTP60

    With http
        .Open "GET", "https://www.wsj.com/market-data/bonds/treasuries", False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
        .send
        s = .responseText
    End With
    
    Dim re As VBScript_RegExp_55.RegExp 'required reference Microsoft VBScript Regular Expressions
    
    Set re = New VBScript_RegExp_55.RegExp
    re.Pattern = "instruments"":\[(.*?)\]"
    s = re.Execute(s)(0).SubMatches(0)
    
    Dim headers() As Variant, r As Long, c As Long, mappingDict As Scripting.Dictionary 'required reference Microsoft Scripting Runtime
    
    Set mappingDict = New Scripting.Dictionary
    mappingDict.Add "maturityDate", "MATURITY"
    mappingDict.Add "coupon", "COUPON"
    mappingDict.Add "bid", "BID"
    mappingDict.Add "ask", "ASKED"
    mappingDict.Add "change", "CHG"
    mappingDict.Add "askYield", "ASKED YIELD"
    
    headers = mappingDict.keys
    
    Dim results() As String, output() As Variant, key As Variant
    
    results = Split(s, "}")
    ReDim output(1 To UBound(results), 1 To UBound(headers) + 1)
    
    For r = LBound(results) To UBound(results) - 1
        c = 1
        For Each key In mappingDict.keys
            re.Pattern = "" & key & """:""(.*?)"""
            output(r + 1, c) = re.Execute(results(r))(0).SubMatches(0)
            c = c + 1
        Next
    Next
    
    re.Pattern = "timestamp"":""(.*?)"""
    re.Global = True
    
    With ThisWorkbook.Worksheets("Treasury Notes & Bonds")
        
        .UsedRange.ClearContents
        
         Dim matches As VBScript_RegExp_55.MatchCollection
         
         Set matches = re.Execute(http.responseText)
        .Cells(1, 1) = matches(matches.Count - 1).SubMatches(0)
        .Cells(2, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(3, 1).Resize(UBound(output, 1), UBound(output, 2)) = output
    End With
End Sub