使用 Excel VBA 加载与 IE11 不兼容的网站
Using Excel VBA to load a website that is incompatible with IE11
在 Excel VBA 中加载网站并将其放入 sheet 我一直在使用以下内容:
Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE .navigate "https://www.wsj.com/market-data/bonds/treasuries"
然后我可以将其复制并粘贴到我的 Excel sheet 中。
但是这个网站已经不能用IE11了,ExcelVBA虽然IE11快要被淘汰了,但还是坚持用IE11
还有别的办法吗?我也看过:
Selenium:但对于 VBA(自 2016 年以来未更新),它似乎已经过时了,我无法在 VBA 中使用 Edge 或 Firefox ] 反正
AutoIt:我将网站的 HTML 代码写入 TXT 文件 (oHTTP = ObjCreate("winhttp.winhttprequest.5.1") ; $oHTTP.Open("GET", $URL1, False) ; $oHTTP.Send(); $oReceived = $oHTTP.ResponseText; FileWrite($file, $oReceived)) 但 txt 文件内容远非方便里面有无穷无尽的 HTML 东西。需要相当多的 VBA 代码来整理混乱,这可能意味着它以后不会可靠。另外考虑到我的工作簿的大小非常慢,将网站数据逐个元素复制到 sheet 中实际上需要几分钟。
肯定有一种简单的方法可以将网站或网站内的 table 加载到 Excel sheet 中?这一定是一条人迹罕至的道路,但经过大量谷歌搜索后,我找不到一个实际可行的简单解决方案。
我有 5-10 个网页正在加载到这个工作簿中,这似乎是一项全职工作,让整个工作正常进行!!任何thoughts/help非常感谢!!!
以下代码(不使用网络驱动程序)有效,但不是一个简单的解决方案。我能够找到存储在正文中的信息,这些信息通过使用 REGEX 隔离,然后存储到 JSON 文件中进行解析。
Dim XMLPage As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim strPattern As String: strPattern = "window.__STATE__ = ({.+}}}});"
Dim JSON As Object
Dim Key As Variant
Dim key1, key2 As String
XMLPage.Open "GET", "https://www.wsj.com/market-data/bonds/treasuries", False
XMLPage.send
Set JSON = JsonConverter.ParseJson(REGEX(XMLPage.responseText, strPattern, ""))
' Notes and Bonds
key1 = "mdc_treasury_{" & """" & "treasury" & """" & ":" & """" & "NOTES_AND_BONDS" & """" & "}"
For Each Key In JSON("data")(key1)("data")("data")("instruments")
Debug.Print Key("maturityDate")
Debug.Print Key("ask")
Debug.Print Key("askYield")
Debug.Print Key("bid")
Debug.Print Key("change")
Next Key
' Bills
key2 = "mdc_treasury_{" & """" & "treasury" & """" & ":" & """" & "BILLS" & """" & "}"
For Each Key In JSON("data")(key2)("data")("data")("instruments")
Debug.Print Key("maturityDate")
Debug.Print Key("ask")
Debug.Print Key("askYield")
Debug.Print Key("bid")
Debug.Print Key("change")
Next Key
需要将以下函数复制到模块中:
Function REGEX(strInput As String, matchPattern As String, Optional ByVal outputPattern As String = "[=11=]") As Variant
Dim inputRegexObj As New VBScript_RegExp_55.RegExp, outputRegexObj As New VBScript_RegExp_55.RegExp, outReplaceRegexObj As New VBScript_RegExp_55.RegExp
Dim inputMatches As Object, replaceMatches As Object, replaceMatch As Object
Dim replaceNumber As Integer
With inputRegexObj
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = matchPattern
End With
With outputRegexObj
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "$(\d+)"
End With
With outReplaceRegexObj
.Global = True
.MultiLine = True
.IgnoreCase = False
End With
Set inputMatches = inputRegexObj.Execute(strInput)
If inputMatches.Count = 0 Then
REGEX = False
Else
Set replaceMatches = outputRegexObj.Execute(outputPattern)
For Each replaceMatch In replaceMatches
replaceNumber = replaceMatch.SubMatches(0)
outReplaceRegexObj.Pattern = "$" & replaceNumber
If replaceNumber = 0 Then
outputPattern = outReplaceRegexObj.Replace(outputPattern, inputMatches(0).value)
Else
If replaceNumber > inputMatches(0).SubMatches.Count Then
'regex = "A to high $ tag found. Largest allowed is $" & inputMatches(0).SubMatches.Count & "."
REGEX = CVErr(xlErrValue)
Exit Function
Else
outputPattern = outReplaceRegexObj.Replace(outputPattern, inputMatches(0).SubMatches(replaceNumber - 1))
End If
End If
Next
REGEX = outputPattern
End If
End Function
以下资源会有所帮助:
How to use Regular Expressions (Regex) in Microsoft Excel both in-cell and loops
https://github.com/VBA-tools/VBA-JSON
您需要安装 JSON 转换器并在库中引用正则表达式。 REGEX 函数是在堆栈溢出的其他地方找到的,因此其他人应该得到它的功劳。
与 Christopher 在使用正则表达式时的回答类似的想法。我正在获取仪器数据(JS 数组),将组件字典拆分出来(减去末尾 }
),然后使用基于 headers 的正则表达式来获取适当的值。
我使用字典来处理 input/output headers,并设置了几个请求 headers 以帮助发出基于浏览器的请求并减轻缓存结果的服务。
理想情况下,可以使用 html 解析器并获取 script
标记,然后在 JavaScript object 上使用 json 解析器script
标签。
如果您想要来自其他选项卡式结果的数据,我可以通过显式设置 re.Global = True
来添加它,然后循环返回的匹配项。取决于您是否想要这些以及您希望它们如何出现在 sheet(s).
中
我目前将结果写入名为 Treasury Notes & Bonds
的 sheet。
Option Explicit
Public Sub GetTradeData()
Dim s As String, http As MSXML2.XMLHTTP60 'required reference Microsoft XML v6,
Set http = New MSXML2.XMLHTTP60
With http
.Open "GET", "https://www.wsj.com/market-data/bonds/treasuries", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
s = .responseText
End With
Dim re As VBScript_RegExp_55.RegExp 'required reference Microsoft VBScript Regular Expressions
Set re = New VBScript_RegExp_55.RegExp
re.Pattern = "instruments"":\[(.*?)\]"
s = re.Execute(s)(0).SubMatches(0)
Dim headers() As Variant, r As Long, c As Long, mappingDict As Scripting.Dictionary 'required reference Microsoft Scripting Runtime
Set mappingDict = New Scripting.Dictionary
mappingDict.Add "maturityDate", "MATURITY"
mappingDict.Add "coupon", "COUPON"
mappingDict.Add "bid", "BID"
mappingDict.Add "ask", "ASKED"
mappingDict.Add "change", "CHG"
mappingDict.Add "askYield", "ASKED YIELD"
headers = mappingDict.keys
Dim results() As String, output() As Variant, key As Variant
results = Split(s, "}")
ReDim output(1 To UBound(results), 1 To UBound(headers) + 1)
For r = LBound(results) To UBound(results) - 1
c = 1
For Each key In mappingDict.keys
re.Pattern = "" & key & """:""(.*?)"""
output(r + 1, c) = re.Execute(results(r))(0).SubMatches(0)
c = c + 1
Next
Next
re.Pattern = "timestamp"":""(.*?)"""
re.Global = True
With ThisWorkbook.Worksheets("Treasury Notes & Bonds")
.UsedRange.ClearContents
Dim matches As VBScript_RegExp_55.MatchCollection
Set matches = re.Execute(http.responseText)
.Cells(1, 1) = matches(matches.Count - 1).SubMatches(0)
.Cells(2, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(3, 1).Resize(UBound(output, 1), UBound(output, 2)) = output
End With
End Sub
在 Excel VBA 中加载网站并将其放入 sheet 我一直在使用以下内容:
Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE .navigate "https://www.wsj.com/market-data/bonds/treasuries"
然后我可以将其复制并粘贴到我的 Excel sheet 中。 但是这个网站已经不能用IE11了,ExcelVBA虽然IE11快要被淘汰了,但还是坚持用IE11
还有别的办法吗?我也看过:
Selenium:但对于 VBA(自 2016 年以来未更新),它似乎已经过时了,我无法在 VBA 中使用 Edge 或 Firefox ] 反正
AutoIt:我将网站的 HTML 代码写入 TXT 文件 (oHTTP = ObjCreate("winhttp.winhttprequest.5.1") ; $oHTTP.Open("GET", $URL1, False) ; $oHTTP.Send(); $oReceived = $oHTTP.ResponseText; FileWrite($file, $oReceived)) 但 txt 文件内容远非方便里面有无穷无尽的 HTML 东西。需要相当多的 VBA 代码来整理混乱,这可能意味着它以后不会可靠。另外考虑到我的工作簿的大小非常慢,将网站数据逐个元素复制到 sheet 中实际上需要几分钟。
肯定有一种简单的方法可以将网站或网站内的 table 加载到 Excel sheet 中?这一定是一条人迹罕至的道路,但经过大量谷歌搜索后,我找不到一个实际可行的简单解决方案。
我有 5-10 个网页正在加载到这个工作簿中,这似乎是一项全职工作,让整个工作正常进行!!任何thoughts/help非常感谢!!!
以下代码(不使用网络驱动程序)有效,但不是一个简单的解决方案。我能够找到存储在正文中的信息,这些信息通过使用 REGEX 隔离,然后存储到 JSON 文件中进行解析。
Dim XMLPage As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim strPattern As String: strPattern = "window.__STATE__ = ({.+}}}});"
Dim JSON As Object
Dim Key As Variant
Dim key1, key2 As String
XMLPage.Open "GET", "https://www.wsj.com/market-data/bonds/treasuries", False
XMLPage.send
Set JSON = JsonConverter.ParseJson(REGEX(XMLPage.responseText, strPattern, ""))
' Notes and Bonds
key1 = "mdc_treasury_{" & """" & "treasury" & """" & ":" & """" & "NOTES_AND_BONDS" & """" & "}"
For Each Key In JSON("data")(key1)("data")("data")("instruments")
Debug.Print Key("maturityDate")
Debug.Print Key("ask")
Debug.Print Key("askYield")
Debug.Print Key("bid")
Debug.Print Key("change")
Next Key
' Bills
key2 = "mdc_treasury_{" & """" & "treasury" & """" & ":" & """" & "BILLS" & """" & "}"
For Each Key In JSON("data")(key2)("data")("data")("instruments")
Debug.Print Key("maturityDate")
Debug.Print Key("ask")
Debug.Print Key("askYield")
Debug.Print Key("bid")
Debug.Print Key("change")
Next Key
需要将以下函数复制到模块中:
Function REGEX(strInput As String, matchPattern As String, Optional ByVal outputPattern As String = "[=11=]") As Variant
Dim inputRegexObj As New VBScript_RegExp_55.RegExp, outputRegexObj As New VBScript_RegExp_55.RegExp, outReplaceRegexObj As New VBScript_RegExp_55.RegExp
Dim inputMatches As Object, replaceMatches As Object, replaceMatch As Object
Dim replaceNumber As Integer
With inputRegexObj
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = matchPattern
End With
With outputRegexObj
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "$(\d+)"
End With
With outReplaceRegexObj
.Global = True
.MultiLine = True
.IgnoreCase = False
End With
Set inputMatches = inputRegexObj.Execute(strInput)
If inputMatches.Count = 0 Then
REGEX = False
Else
Set replaceMatches = outputRegexObj.Execute(outputPattern)
For Each replaceMatch In replaceMatches
replaceNumber = replaceMatch.SubMatches(0)
outReplaceRegexObj.Pattern = "$" & replaceNumber
If replaceNumber = 0 Then
outputPattern = outReplaceRegexObj.Replace(outputPattern, inputMatches(0).value)
Else
If replaceNumber > inputMatches(0).SubMatches.Count Then
'regex = "A to high $ tag found. Largest allowed is $" & inputMatches(0).SubMatches.Count & "."
REGEX = CVErr(xlErrValue)
Exit Function
Else
outputPattern = outReplaceRegexObj.Replace(outputPattern, inputMatches(0).SubMatches(replaceNumber - 1))
End If
End If
Next
REGEX = outputPattern
End If
End Function
以下资源会有所帮助:
How to use Regular Expressions (Regex) in Microsoft Excel both in-cell and loops
https://github.com/VBA-tools/VBA-JSON
您需要安装 JSON 转换器并在库中引用正则表达式。 REGEX 函数是在堆栈溢出的其他地方找到的,因此其他人应该得到它的功劳。
与 Christopher 在使用正则表达式时的回答类似的想法。我正在获取仪器数据(JS 数组),将组件字典拆分出来(减去末尾 }
),然后使用基于 headers 的正则表达式来获取适当的值。
我使用字典来处理 input/output headers,并设置了几个请求 headers 以帮助发出基于浏览器的请求并减轻缓存结果的服务。
理想情况下,可以使用 html 解析器并获取 script
标记,然后在 JavaScript object 上使用 json 解析器script
标签。
如果您想要来自其他选项卡式结果的数据,我可以通过显式设置 re.Global = True
来添加它,然后循环返回的匹配项。取决于您是否想要这些以及您希望它们如何出现在 sheet(s).
我目前将结果写入名为 Treasury Notes & Bonds
的 sheet。
Option Explicit
Public Sub GetTradeData()
Dim s As String, http As MSXML2.XMLHTTP60 'required reference Microsoft XML v6,
Set http = New MSXML2.XMLHTTP60
With http
.Open "GET", "https://www.wsj.com/market-data/bonds/treasuries", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
s = .responseText
End With
Dim re As VBScript_RegExp_55.RegExp 'required reference Microsoft VBScript Regular Expressions
Set re = New VBScript_RegExp_55.RegExp
re.Pattern = "instruments"":\[(.*?)\]"
s = re.Execute(s)(0).SubMatches(0)
Dim headers() As Variant, r As Long, c As Long, mappingDict As Scripting.Dictionary 'required reference Microsoft Scripting Runtime
Set mappingDict = New Scripting.Dictionary
mappingDict.Add "maturityDate", "MATURITY"
mappingDict.Add "coupon", "COUPON"
mappingDict.Add "bid", "BID"
mappingDict.Add "ask", "ASKED"
mappingDict.Add "change", "CHG"
mappingDict.Add "askYield", "ASKED YIELD"
headers = mappingDict.keys
Dim results() As String, output() As Variant, key As Variant
results = Split(s, "}")
ReDim output(1 To UBound(results), 1 To UBound(headers) + 1)
For r = LBound(results) To UBound(results) - 1
c = 1
For Each key In mappingDict.keys
re.Pattern = "" & key & """:""(.*?)"""
output(r + 1, c) = re.Execute(results(r))(0).SubMatches(0)
c = c + 1
Next
Next
re.Pattern = "timestamp"":""(.*?)"""
re.Global = True
With ThisWorkbook.Worksheets("Treasury Notes & Bonds")
.UsedRange.ClearContents
Dim matches As VBScript_RegExp_55.MatchCollection
Set matches = re.Execute(http.responseText)
.Cells(1, 1) = matches(matches.Count - 1).SubMatches(0)
.Cells(2, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(3, 1).Resize(UBound(output, 1), UBound(output, 2)) = output
End With
End Sub