如何从限制为 100 行的雅虎财经下载数据
How to download data from Yahoo finance limited to 100 rows
所以我正在做这个项目,我必须从雅虎财经下载历史股票数据。得到这个代码。它工作正常,但最多只能下载 100 行。我试图在网上搜索答案或不同的代码(这个只是从 excel 录制的宏),但我在 YouTube 上看到一些使用他的解决方案的教程,而且很好。
..我当时没看懂
Sub Makro6()
' Dowload stock prices from Yahoo Finance based on input
Dim ws As Worksheet
Set ws = Sheets("Data")
'clear previous queries
For Each qr In ThisWorkbook.Queries
qr.Delete
Next qr
'clear Data sheet
ws.Select
Cells.Clear
'clear graphs
'ws.ChartObjects.Delete
'stock = Sheets("Main").Range("A2")
StartDate = toUnix(Sheets("Main").Range("A4"))
EndDate = toUnix(Sheets("Main").Range("A6"))
Application.CutCopyMode = False
ActiveWorkbook.Queries.Add Name:="Table 2 (3)", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Zdroj = Web.Page(Web.Contents(""https://finance.yahoo.com/quote/" & stock & "/history?period1=" & StartDate & "&period2=" & EndDate & "&interval=1d&filter=history&frequency=1d""))," & Chr(13) & "" & Chr(10) & " Data2 = Zdroj{2}[Data]," & Chr(13) & "" & Chr(10) & " #""Změněný typ"" = Table.TransformColumnTypes(Data2,{{""Date"", type date}, {""Open"", type text}, {""High"", type text}, {""Low"", type text}, {""Close*"", type tex" & _
"t}, {""Adj Close**"", type text}, {""Volume"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Změněný typ"""
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 2 (3)"";Extended Properties=""""" _
, Destination:=Range("$A")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Table 2 (3)]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = "Table_2_3"
.Refresh BackgroundQuery:=False
End With
Sheets("Data").Select
'' Sort data by date from oldest to newest
ws.ListObjects("Table_2_3").Sort.SortFields. _
Clear
ws.ListObjects("Table_2_3").Sort.SortFields. _
Add2 Key:=Range("A1:A99"), SortOn:=xlSortOnValues, Order:=xlAscending, _
DataOption:=xlSortNormal
With ws.ListObjects("Table_2_3").Sort
.Header = xlYes
.MatchCase = False
.Orientation = xlTopToBottom
.SortMethod = xlPinYin
.Apply
End With
Call DeleteDividends
Call Stochastics
End Sub
该代码适用于其他网站。我尝试下载维基百科页面列表,共 120 个,加载数据没问题。
问题是雅虎财经网站的数据是项目需求
如果您检查该页面,您会发现 HTMLTable
行中最初只显示 100 个结果(准确地说是 tbody
)。
在浏览器元素选项卡搜索框中输入css选择器[data-test="historical-prices"] tbody tr
(F12打开开发工具)你会看到这个:
其余行是在您向下滚动页面时从数据存储动态提供的。当然,您当前的方法无法解决这些问题。实际上,您可以发出一个 xhr 请求,用正则表达式输出包含所有行的适当 javascript 对象,然后使用 json 解析器进行解析。
以下是您当前应该看到的大致回复:
我使用 jsonconverter.bas 作为我的 json 解析器。从 here 下载原始代码并添加到名为 jsonConverter
的标准模块中。然后您需要转到 VBE > 工具 > 参考 > 添加对 Microsoft 脚本运行时的参考。
@TimWilliams 写了一个更好的 unix 转换函数 here 但我想我可以尝试写一些不同的东西。我会建议你坚持使用他的,因为它更安全、更快。
VBA:
Option Explicit
Public Sub GetYahooData()
'< VBE > Tools > References > Microsoft Scripting Runtime
Dim json As Object, re As Object, s As String, xhr As Object, ws As Worksheet
Dim startDate As String, endDate As String, stock As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set re = CreateObject("VBScript.RegExp")
Set xhr = CreateObject("MSXML2.XMLHTTP")
stock = "AAPL"
startDate = "1534809600"
endDate = "1566345600"
With xhr
.Open "GET", "https://finance.yahoo.com/quote/" & stock & "/history?period1=" & startDate & "&period2=" & endDate & "&interval=1d&filter=history&frequency=1d&_guc_consent_skip=" & GetCurrentUnix(Now()), False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
End With
s = GetJsonString(re, s)
If s = "No match" Then Exit Sub
Set json = JsonConverter.ParseJson(s)
WriteOutResults ws, json
End Sub
Public Sub WriteOutResults(ByVal ws As Worksheet, ByVal json As Object)
Dim item As Object, key As Variant, headers(), results(), r As Long, c As Long
headers = json.item(1).keys
ReDim results(1 To json.Count, 1 To UBound(headers) + 1)
For Each item In json
r = r + 1: c = 1
For Each key In item.keys
results(r, c) = item(key)
c = c + 1
Next
Next
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetJsonString(ByVal re As Object, ByVal responseText As String) As String
With re
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "HistoricalPriceStore"":{""prices"":(.*?\])" 'regex pattern to get json string
If .test(responseText) Then
GetJsonString = .Execute(responseText)(0).SubMatches(0)
Else
GetJsonString = "No match"
End If
End With
End Function
Public Function GetCurrentUnix(ByVal t As Double) As String
With CreateObject("htmlfile")
.parentWindow.execScript "function GetTimeStamp(t){return new Date(t).getTime() / 1000}", "jscript"
GetCurrentUnix = .parentWindow.GetTimeStamp(Now)
End With
End Function
正则表达式:
Python:
我最初写成 python 如果有兴趣:
import requests, re, json
from bs4 import BeautifulSoup as bs
p = re.compile('HistoricalPriceStore":{"prices":(.*?\])')
r = requests.get('https://finance.yahoo.com/quote/AAPL/history?period1=1534809600&period2=1566345600&interval=1d&filter=history&frequency=1d&_guc_consent_skip=1566859607')
data = json.loads(p.findall(r.text)[0])
所以我正在做这个项目,我必须从雅虎财经下载历史股票数据。得到这个代码。它工作正常,但最多只能下载 100 行。我试图在网上搜索答案或不同的代码(这个只是从 excel 录制的宏),但我在 YouTube 上看到一些使用他的解决方案的教程,而且很好。
..我当时没看懂
Sub Makro6()
' Dowload stock prices from Yahoo Finance based on input
Dim ws As Worksheet
Set ws = Sheets("Data")
'clear previous queries
For Each qr In ThisWorkbook.Queries
qr.Delete
Next qr
'clear Data sheet
ws.Select
Cells.Clear
'clear graphs
'ws.ChartObjects.Delete
'stock = Sheets("Main").Range("A2")
StartDate = toUnix(Sheets("Main").Range("A4"))
EndDate = toUnix(Sheets("Main").Range("A6"))
Application.CutCopyMode = False
ActiveWorkbook.Queries.Add Name:="Table 2 (3)", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Zdroj = Web.Page(Web.Contents(""https://finance.yahoo.com/quote/" & stock & "/history?period1=" & StartDate & "&period2=" & EndDate & "&interval=1d&filter=history&frequency=1d""))," & Chr(13) & "" & Chr(10) & " Data2 = Zdroj{2}[Data]," & Chr(13) & "" & Chr(10) & " #""Změněný typ"" = Table.TransformColumnTypes(Data2,{{""Date"", type date}, {""Open"", type text}, {""High"", type text}, {""Low"", type text}, {""Close*"", type tex" & _
"t}, {""Adj Close**"", type text}, {""Volume"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Změněný typ"""
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 2 (3)"";Extended Properties=""""" _
, Destination:=Range("$A")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Table 2 (3)]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = "Table_2_3"
.Refresh BackgroundQuery:=False
End With
Sheets("Data").Select
'' Sort data by date from oldest to newest
ws.ListObjects("Table_2_3").Sort.SortFields. _
Clear
ws.ListObjects("Table_2_3").Sort.SortFields. _
Add2 Key:=Range("A1:A99"), SortOn:=xlSortOnValues, Order:=xlAscending, _
DataOption:=xlSortNormal
With ws.ListObjects("Table_2_3").Sort
.Header = xlYes
.MatchCase = False
.Orientation = xlTopToBottom
.SortMethod = xlPinYin
.Apply
End With
Call DeleteDividends
Call Stochastics
End Sub
该代码适用于其他网站。我尝试下载维基百科页面列表,共 120 个,加载数据没问题。
问题是雅虎财经网站的数据是项目需求
如果您检查该页面,您会发现 HTMLTable
行中最初只显示 100 个结果(准确地说是 tbody
)。
在浏览器元素选项卡搜索框中输入css选择器[data-test="historical-prices"] tbody tr
(F12打开开发工具)你会看到这个:
其余行是在您向下滚动页面时从数据存储动态提供的。当然,您当前的方法无法解决这些问题。实际上,您可以发出一个 xhr 请求,用正则表达式输出包含所有行的适当 javascript 对象,然后使用 json 解析器进行解析。
以下是您当前应该看到的大致回复:
我使用 jsonconverter.bas 作为我的 json 解析器。从 here 下载原始代码并添加到名为 jsonConverter
的标准模块中。然后您需要转到 VBE > 工具 > 参考 > 添加对 Microsoft 脚本运行时的参考。
@TimWilliams 写了一个更好的 unix 转换函数 here 但我想我可以尝试写一些不同的东西。我会建议你坚持使用他的,因为它更安全、更快。
VBA:
Option Explicit
Public Sub GetYahooData()
'< VBE > Tools > References > Microsoft Scripting Runtime
Dim json As Object, re As Object, s As String, xhr As Object, ws As Worksheet
Dim startDate As String, endDate As String, stock As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set re = CreateObject("VBScript.RegExp")
Set xhr = CreateObject("MSXML2.XMLHTTP")
stock = "AAPL"
startDate = "1534809600"
endDate = "1566345600"
With xhr
.Open "GET", "https://finance.yahoo.com/quote/" & stock & "/history?period1=" & startDate & "&period2=" & endDate & "&interval=1d&filter=history&frequency=1d&_guc_consent_skip=" & GetCurrentUnix(Now()), False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
End With
s = GetJsonString(re, s)
If s = "No match" Then Exit Sub
Set json = JsonConverter.ParseJson(s)
WriteOutResults ws, json
End Sub
Public Sub WriteOutResults(ByVal ws As Worksheet, ByVal json As Object)
Dim item As Object, key As Variant, headers(), results(), r As Long, c As Long
headers = json.item(1).keys
ReDim results(1 To json.Count, 1 To UBound(headers) + 1)
For Each item In json
r = r + 1: c = 1
For Each key In item.keys
results(r, c) = item(key)
c = c + 1
Next
Next
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetJsonString(ByVal re As Object, ByVal responseText As String) As String
With re
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "HistoricalPriceStore"":{""prices"":(.*?\])" 'regex pattern to get json string
If .test(responseText) Then
GetJsonString = .Execute(responseText)(0).SubMatches(0)
Else
GetJsonString = "No match"
End If
End With
End Function
Public Function GetCurrentUnix(ByVal t As Double) As String
With CreateObject("htmlfile")
.parentWindow.execScript "function GetTimeStamp(t){return new Date(t).getTime() / 1000}", "jscript"
GetCurrentUnix = .parentWindow.GetTimeStamp(Now)
End With
End Function
正则表达式:
Python:
我最初写成 python 如果有兴趣:
import requests, re, json
from bs4 import BeautifulSoup as bs
p = re.compile('HistoricalPriceStore":{"prices":(.*?\])')
r = requests.get('https://finance.yahoo.com/quote/AAPL/history?period1=1534809600&period2=1566345600&interval=1d&filter=history&frequency=1d&_guc_consent_skip=1566859607')
data = json.loads(p.findall(r.text)[0])