如何从限制为 100 行的雅虎财经下载数据

How to download data from Yahoo finance limited to 100 rows

所以我正在做这个项目,我必须从雅虎财经下载历史股票数据。得到这个代码。它工作正常,但最多只能下载 100 行。我试图在网上搜索答案或不同的代码(这个只是从 excel 录制的宏),但我在 YouTube 上看到一些使用他的解决方案的教程,而且很好。

..我当时没看懂

Sub Makro6()

    ' Dowload stock prices from Yahoo Finance based on input

    Dim ws As Worksheet

    Set ws = Sheets("Data")

    'clear previous queries
    For Each qr In ThisWorkbook.Queries
        qr.Delete
    Next qr

    'clear Data sheet
    ws.Select
    Cells.Clear

    'clear graphs
    'ws.ChartObjects.Delete

    'stock = Sheets("Main").Range("A2")

    StartDate = toUnix(Sheets("Main").Range("A4"))
    EndDate = toUnix(Sheets("Main").Range("A6"))

    Application.CutCopyMode = False
    ActiveWorkbook.Queries.Add Name:="Table 2 (3)", Formula:= _
        "let" & Chr(13) & "" & Chr(10) & "    Zdroj = Web.Page(Web.Contents(""https://finance.yahoo.com/quote/" & stock & "/history?period1=" & StartDate & "&period2=" & EndDate & "&interval=1d&filter=history&frequency=1d""))," & Chr(13) & "" & Chr(10) & "    Data2 = Zdroj{2}[Data]," & Chr(13) & "" & Chr(10) & "    #""Změněný typ"" = Table.TransformColumnTypes(Data2,{{""Date"", type date}, {""Open"", type text}, {""High"", type text}, {""Low"", type text}, {""Close*"", type tex" & _
        "t}, {""Adj Close**"", type text}, {""Volume"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & "    #""Změněný typ"""
    With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
        "OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 2 (3)"";Extended Properties=""""" _
        , Destination:=Range("$A")).QueryTable
        .CommandType = xlCmdSql
        .CommandText = Array("SELECT * FROM [Table 2 (3)]")
        .RowNumbers = False
        .FillAdjacentFormulas = False
        .PreserveFormatting = True
        .RefreshOnFileOpen = False
        .BackgroundQuery = True
        .RefreshStyle = xlInsertDeleteCells
        .SavePassword = False
        .SaveData = True
        .AdjustColumnWidth = True
        .RefreshPeriod = 0
        .PreserveColumnInfo = True
        .ListObject.DisplayName = "Table_2_3"
        .Refresh BackgroundQuery:=False
    End With
    Sheets("Data").Select

    '' Sort data by date from oldest to newest

    ws.ListObjects("Table_2_3").Sort.SortFields. _
        Clear
    ws.ListObjects("Table_2_3").Sort.SortFields. _
        Add2 Key:=Range("A1:A99"), SortOn:=xlSortOnValues, Order:=xlAscending, _
        DataOption:=xlSortNormal
    With ws.ListObjects("Table_2_3").Sort
        .Header = xlYes
        .MatchCase = False
        .Orientation = xlTopToBottom
        .SortMethod = xlPinYin
        .Apply
    End With

    Call DeleteDividends

    Call Stochastics


End Sub

该代码适用于其他网站。我尝试下载维基百科页面列表,共 120 个,加载数据没问题。

问题是雅虎财经网站的数据是项目需求

如果您检查该页面,您会发现 HTMLTable 行中最初只显示 100 个结果(准确地说是 tbody)。

在浏览器元素选项卡搜索框中输入css选择器[data-test="historical-prices"] tbody trF12打开开发工具)你会看到这个:

其余行是在您向下滚动页面时从数据存储动态提供的。当然,您当前的方法无法解决这些问题。实际上,您可以发出一个 xhr 请求,用正则表达式输出包含所有行的适当 javascript 对象,然后使用 json 解析器进行解析。

以下是您当前应该看到的大致回复:

我使用 jsonconverter.bas 作为我的 json 解析器。从 here 下载原始代码并添加到名为 jsonConverter 的标准模块中。然后您需要转到 VBE > 工具 > 参考 > 添加对 Microsoft 脚本运行时的参考。

@TimWilliams 写了一个更好的 unix 转换函数 here 但我想我可以尝试写一些不同的东西。我会建议你坚持使用他的,因为它更安全、更快。


VBA:

Option Explicit  
Public Sub GetYahooData()
    '<  VBE > Tools > References > Microsoft Scripting Runtime
    Dim json As Object, re As Object, s As String, xhr As Object, ws As Worksheet
    Dim startDate As String, endDate As String, stock As String

    Set ws = ThisWorkbook.Worksheets("Sheet1")
    Set re = CreateObject("VBScript.RegExp")
    Set xhr = CreateObject("MSXML2.XMLHTTP")

    stock = "AAPL"
    startDate = "1534809600" 
    endDate = "1566345600"     

    With xhr
        .Open "GET", "https://finance.yahoo.com/quote/" & stock & "/history?period1=" & startDate & "&period2=" & endDate & "&interval=1d&filter=history&frequency=1d&_guc_consent_skip=" & GetCurrentUnix(Now()), False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .send
        s = .responseText
    End With

    s = GetJsonString(re, s)
    If s = "No match" Then Exit Sub

    Set json = JsonConverter.ParseJson(s)
    WriteOutResults ws, json
End Sub

Public Sub WriteOutResults(ByVal ws As Worksheet, ByVal json As Object)
    Dim item As Object, key As Variant, headers(), results(), r As Long, c As Long

    headers = json.item(1).keys
    ReDim results(1 To json.Count, 1 To UBound(headers) + 1)

    For Each item In json
        r = r + 1: c = 1
        For Each key In item.keys
            results(r, c) = item(key)
            c = c + 1
        Next
    Next
    With ws
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
    End With      
End Sub

Public Function GetJsonString(ByVal re As Object, ByVal responseText As String) As String
    With re
        .Global = True
        .MultiLine = True
        .IgnoreCase = False
        .Pattern = "HistoricalPriceStore"":{""prices"":(.*?\])" 'regex pattern to get json string
        If .test(responseText) Then
            GetJsonString = .Execute(responseText)(0).SubMatches(0)
        Else
            GetJsonString = "No match"
        End If
    End With
End Function

Public Function GetCurrentUnix(ByVal t As Double) As String
    With CreateObject("htmlfile")
        .parentWindow.execScript "function GetTimeStamp(t){return new Date(t).getTime() / 1000}", "jscript"
        GetCurrentUnix = .parentWindow.GetTimeStamp(Now)
    End With
End Function

正则表达式:


Python:

我最初写成 python 如果有兴趣:

import requests, re, json
from bs4 import BeautifulSoup as bs

p = re.compile('HistoricalPriceStore":{"prices":(.*?\])')
r = requests.get('https://finance.yahoo.com/quote/AAPL/history?period1=1534809600&period2=1566345600&interval=1d&filter=history&frequency=1d&_guc_consent_skip=1566859607')
data = json.loads(p.findall(r.text)[0])