雅虎财经历史数据下载器 url 无法正常工作
Yahoo Finance Historical data downloader url is not working
我使用了以下 url 从雅虎财经获取历史数据。从 2017 年 5 月 16 日起,url 无法正常工作。
他们似乎已经更改了 url,新的 url 是:
在上面更改的 URL 有一个会话 cookie,它是碎屑。是否知道如何以编程方式获取此 cookie(在 JAVA 中)?
您可以在 Chrome 中手动保存 crumb/cookie 对,或者您可以使用 this 之类的东西来生成它。然后,只需在 java 中设置 cookie header 并在 URL
中传递相应的面包屑
成功了,现在我只需要解析 csv。我想分享一下,因为我在语法上遇到了麻烦。
Dim crumb As String: crumb = "xxxx"
Dim cookie As String: cookie = "yyyy"
Dim urlStock As String: urlStock = "https://query1.finance.yahoo.com/v7/finance/download/SIRI?" & _
"period1=1274158800&" & _
"period2=1495059477&" & _
"interval=1d&events=history&crumb=" & crumb
Dim http As MSXML2.XMLHTTP: Set http = New MSXML2.ServerXMLHTTP
http.Open "GET", urlStock, False
http.setRequestHeader "Cookie", cookie
http.send
我最近写了一个简单的python脚本来下载单个股票的历史记录。
这里有一个如何调用它的例子:
python get_quote_history.py --symbol=IBM --from=2017-01-01 --to=2017-05-25 -o IBM.csv
这将下载 IBM 从 2017-01-01 到 2017-05-25 的历史价格并将它们保存在 IBM.csv 文件中。
import re
import urllib2
import calendar
import datetime
import getopt
import sys
import time
crumble_link = 'https://finance.yahoo.com/quote/{0}/history?p={0}'
crumble_regex = r'CrumbStore":{"crumb":"(.*?)"}'
cookie_regex = r'Set-Cookie: (.*?); '
quote_link = 'https://query1.finance.yahoo.com/v7/finance/download/{}?period1={}&period2={}&interval=1d&events=history&crumb={}'
def get_crumble_and_cookie(symbol):
link = crumble_link.format(symbol)
response = urllib2.urlopen(link)
match = re.search(cookie_regex, str(response.info()))
cookie_str = match.group(1)
text = response.read()
match = re.search(crumble_regex, text)
crumble_str = match.group(1)
return crumble_str, cookie_str
def download_quote(symbol, date_from, date_to):
time_stamp_from = calendar.timegm(datetime.datetime.strptime(date_from, "%Y-%m-%d").timetuple())
time_stamp_to = calendar.timegm(datetime.datetime.strptime(date_to, "%Y-%m-%d").timetuple())
attempts = 0
while attempts < 5:
crumble_str, cookie_str = get_crumble_and_cookie(symbol)
link = quote_link.format(symbol, time_stamp_from, time_stamp_to, crumble_str)
#print link
r = urllib2.Request(link, headers={'Cookie': cookie_str})
try:
response = urllib2.urlopen(r)
text = response.read()
print "{} downloaded".format(symbol)
return text
except urllib2.URLError:
print "{} failed at attempt # {}".format(symbol, attempts)
attempts += 1
time.sleep(2*attempts)
return ""
if __name__ == '__main__':
print get_crumble_and_cookie('KO')
from_arg = "from"
to_arg = "to"
symbol_arg = "symbol"
output_arg = "o"
opt_list = (from_arg+"=", to_arg+"=", symbol_arg+"=")
try:
options, args = getopt.getopt(sys.argv[1:],output_arg+":",opt_list)
except getopt.GetoptError as err:
print err
for opt, value in options:
if opt[2:] == from_arg:
from_val = value
elif opt[2:] == to_arg:
to_val = value
elif opt[2:] == symbol_arg:
symbol_val = value
elif opt[1:] == output_arg:
output_val = value
print "downloading {}".format(symbol_val)
text = download_quote(symbol_val, from_val, to_val)
with open(output_val, 'wb') as f:
f.write(text)
print "{} written to {}".format(symbol_val, output_val)
Andrea Galeazzi 的出色回答;增加了拆分和股息的选项,并扭曲了 python 3.
也进行了更改,因此 "to:date" 包含在返回的结果中,以前的代码返回但不包括 "to:date"。就是不一样!
请注意,雅虎对价格四舍五入、列顺序和拆分语法进行了细微更改。
## Downloaded from
##
## Modified for Python 3
## Added --event=history|div|split default = history
## changed so "to:date" is included in the returned results
## usage: download_quote(symbol, date_from, date_to, events).decode('utf-8')
import re
from urllib.request import urlopen, Request, URLError
import calendar
import datetime
import getopt
import sys
import time
crumble_link = 'https://finance.yahoo.com/quote/{0}/history?p={0}'
crumble_regex = r'CrumbStore":{"crumb":"(.*?)"}'
cookie_regex = r'Set-Cookie: (.*?); '
quote_link = 'https://query1.finance.yahoo.com/v7/finance/download/{}?period1={}&period2={}&interval=1d&events={}&crumb={}'
def get_crumble_and_cookie(symbol):
link = crumble_link.format(symbol)
response = urlopen(link)
match = re.search(cookie_regex, str(response.info()))
cookie_str = match.group(1)
text = response.read().decode("utf-8")
match = re.search(crumble_regex, text)
crumble_str = match.group(1)
return crumble_str , cookie_str
def download_quote(symbol, date_from, date_to,events):
time_stamp_from = calendar.timegm(datetime.datetime.strptime(date_from, "%Y-%m-%d").timetuple())
next_day = datetime.datetime.strptime(date_to, "%Y-%m-%d") + datetime.timedelta(days=1)
time_stamp_to = calendar.timegm(next_day.timetuple())
attempts = 0
while attempts < 5:
crumble_str, cookie_str = get_crumble_and_cookie(symbol)
link = quote_link.format(symbol, time_stamp_from, time_stamp_to, events,crumble_str)
#print link
r = Request(link, headers={'Cookie': cookie_str})
try:
response = urlopen(r)
text = response.read()
print ("{} downloaded".format(symbol))
return text
except URLError:
print ("{} failed at attempt # {}".format(symbol, attempts))
attempts += 1
time.sleep(2*attempts)
return b''
if __name__ == '__main__':
print (get_crumble_and_cookie('KO'))
from_arg = "from"
to_arg = "to"
symbol_arg = "symbol"
event_arg = "event"
output_arg = "o"
opt_list = (from_arg+"=", to_arg+"=", symbol_arg+"=", event_arg+"=")
try:
options, args = getopt.getopt(sys.argv[1:],output_arg+":",opt_list)
except getopt.GetoptError as err:
print (err)
symbol_val = ""
from_val = ""
to_val = ""
output_val = ""
event_val = "history"
for opt, value in options:
if opt[2:] == from_arg:
from_val = value
elif opt[2:] == to_arg:
to_val = value
elif opt[2:] == symbol_arg:
symbol_val = value
elif opt[2:] == event_arg:
event_val = value
elif opt[1:] == output_arg:
output_val = value
print ("downloading {}".format(symbol_val))
text = download_quote(symbol_val, from_val, to_val,event_val)
if text:
with open(output_val, 'wb') as f:
f.write(text)
print ("{} written to {}".format(symbol_val, output_val))
我在 Excel/VBA 中针对此问题开发了以下解决方案。关键挑战是创建 Crumb / Cookie 对。创建后,您可以重新使用它来调用 Yahoo 以获取历史价格。
在此处查看 Crumb / Cookie 的关键代码
Sub GetYahooRequest(strCrumb As String, strCookie As String)
'This routine will use a sample request to Yahoo to obtain a valid Cookie and Crumb
Dim strUrl As String: strUrl = "https://finance.yahoo.com/lookup?s=%7B0%7D"
Dim objRequest As WinHttp.WinHttpRequest
Set objRequest = New WinHttp.WinHttpRequest
With objRequest
.Open "GET", strUrl, True
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.send
.waitForResponse
strCrumb = strExtractCrumb(.responseText)
strCookie = Split(.getResponseHeader("Set-Cookie"), ";")(0)
End With
End Sub
请参阅我网站上的以下 Yahoo Historical Price Extract 示例 Excel 工作簿,该工作簿演示了如何提取 Yahoo 历史价格
Andrea 回答得好,我已将代码添加到您的代码中以允许下载多只股票。 (python 2.7)
文件 1: down.py
import os
myfile = open("ticker.csv", "r")
lines = myfile.readlines()
for line in lines:
ticker = line.strip();
cmd = "python get_quote_history.py --symbol=%s --from=2017-01-01 --to=2017-05-25 -o %s.csv" %(ticker,ticker)
os.system(cmd)
文件 2:ticker.csv
美国航空航天局
微软
文件 3:get_quote_history.py
我编写了一个轻量级脚本,该脚本汇集了此线程中的许多建议来解决此问题。 https://github.com/AndrewRPorter/yahoo-historical
但是,还有更好的解决方案,例如 https://github.com/ranaroussi/fix-yahoo-finance
希望这些资源对您有所帮助!
我使用了以下 url 从雅虎财经获取历史数据。从 2017 年 5 月 16 日起,url 无法正常工作。
他们似乎已经更改了 url,新的 url 是:
在上面更改的 URL 有一个会话 cookie,它是碎屑。是否知道如何以编程方式获取此 cookie(在 JAVA 中)?
您可以在 Chrome 中手动保存 crumb/cookie 对,或者您可以使用 this 之类的东西来生成它。然后,只需在 java 中设置 cookie header 并在 URL
中传递相应的面包屑成功了,现在我只需要解析 csv。我想分享一下,因为我在语法上遇到了麻烦。
Dim crumb As String: crumb = "xxxx"
Dim cookie As String: cookie = "yyyy"
Dim urlStock As String: urlStock = "https://query1.finance.yahoo.com/v7/finance/download/SIRI?" & _
"period1=1274158800&" & _
"period2=1495059477&" & _
"interval=1d&events=history&crumb=" & crumb
Dim http As MSXML2.XMLHTTP: Set http = New MSXML2.ServerXMLHTTP
http.Open "GET", urlStock, False
http.setRequestHeader "Cookie", cookie
http.send
我最近写了一个简单的python脚本来下载单个股票的历史记录。
这里有一个如何调用它的例子:
python get_quote_history.py --symbol=IBM --from=2017-01-01 --to=2017-05-25 -o IBM.csv
这将下载 IBM 从 2017-01-01 到 2017-05-25 的历史价格并将它们保存在 IBM.csv 文件中。
import re
import urllib2
import calendar
import datetime
import getopt
import sys
import time
crumble_link = 'https://finance.yahoo.com/quote/{0}/history?p={0}'
crumble_regex = r'CrumbStore":{"crumb":"(.*?)"}'
cookie_regex = r'Set-Cookie: (.*?); '
quote_link = 'https://query1.finance.yahoo.com/v7/finance/download/{}?period1={}&period2={}&interval=1d&events=history&crumb={}'
def get_crumble_and_cookie(symbol):
link = crumble_link.format(symbol)
response = urllib2.urlopen(link)
match = re.search(cookie_regex, str(response.info()))
cookie_str = match.group(1)
text = response.read()
match = re.search(crumble_regex, text)
crumble_str = match.group(1)
return crumble_str, cookie_str
def download_quote(symbol, date_from, date_to):
time_stamp_from = calendar.timegm(datetime.datetime.strptime(date_from, "%Y-%m-%d").timetuple())
time_stamp_to = calendar.timegm(datetime.datetime.strptime(date_to, "%Y-%m-%d").timetuple())
attempts = 0
while attempts < 5:
crumble_str, cookie_str = get_crumble_and_cookie(symbol)
link = quote_link.format(symbol, time_stamp_from, time_stamp_to, crumble_str)
#print link
r = urllib2.Request(link, headers={'Cookie': cookie_str})
try:
response = urllib2.urlopen(r)
text = response.read()
print "{} downloaded".format(symbol)
return text
except urllib2.URLError:
print "{} failed at attempt # {}".format(symbol, attempts)
attempts += 1
time.sleep(2*attempts)
return ""
if __name__ == '__main__':
print get_crumble_and_cookie('KO')
from_arg = "from"
to_arg = "to"
symbol_arg = "symbol"
output_arg = "o"
opt_list = (from_arg+"=", to_arg+"=", symbol_arg+"=")
try:
options, args = getopt.getopt(sys.argv[1:],output_arg+":",opt_list)
except getopt.GetoptError as err:
print err
for opt, value in options:
if opt[2:] == from_arg:
from_val = value
elif opt[2:] == to_arg:
to_val = value
elif opt[2:] == symbol_arg:
symbol_val = value
elif opt[1:] == output_arg:
output_val = value
print "downloading {}".format(symbol_val)
text = download_quote(symbol_val, from_val, to_val)
with open(output_val, 'wb') as f:
f.write(text)
print "{} written to {}".format(symbol_val, output_val)
Andrea Galeazzi 的出色回答;增加了拆分和股息的选项,并扭曲了 python 3.
也进行了更改,因此 "to:date" 包含在返回的结果中,以前的代码返回但不包括 "to:date"。就是不一样!
请注意,雅虎对价格四舍五入、列顺序和拆分语法进行了细微更改。
## Downloaded from
##
## Modified for Python 3
## Added --event=history|div|split default = history
## changed so "to:date" is included in the returned results
## usage: download_quote(symbol, date_from, date_to, events).decode('utf-8')
import re
from urllib.request import urlopen, Request, URLError
import calendar
import datetime
import getopt
import sys
import time
crumble_link = 'https://finance.yahoo.com/quote/{0}/history?p={0}'
crumble_regex = r'CrumbStore":{"crumb":"(.*?)"}'
cookie_regex = r'Set-Cookie: (.*?); '
quote_link = 'https://query1.finance.yahoo.com/v7/finance/download/{}?period1={}&period2={}&interval=1d&events={}&crumb={}'
def get_crumble_and_cookie(symbol):
link = crumble_link.format(symbol)
response = urlopen(link)
match = re.search(cookie_regex, str(response.info()))
cookie_str = match.group(1)
text = response.read().decode("utf-8")
match = re.search(crumble_regex, text)
crumble_str = match.group(1)
return crumble_str , cookie_str
def download_quote(symbol, date_from, date_to,events):
time_stamp_from = calendar.timegm(datetime.datetime.strptime(date_from, "%Y-%m-%d").timetuple())
next_day = datetime.datetime.strptime(date_to, "%Y-%m-%d") + datetime.timedelta(days=1)
time_stamp_to = calendar.timegm(next_day.timetuple())
attempts = 0
while attempts < 5:
crumble_str, cookie_str = get_crumble_and_cookie(symbol)
link = quote_link.format(symbol, time_stamp_from, time_stamp_to, events,crumble_str)
#print link
r = Request(link, headers={'Cookie': cookie_str})
try:
response = urlopen(r)
text = response.read()
print ("{} downloaded".format(symbol))
return text
except URLError:
print ("{} failed at attempt # {}".format(symbol, attempts))
attempts += 1
time.sleep(2*attempts)
return b''
if __name__ == '__main__':
print (get_crumble_and_cookie('KO'))
from_arg = "from"
to_arg = "to"
symbol_arg = "symbol"
event_arg = "event"
output_arg = "o"
opt_list = (from_arg+"=", to_arg+"=", symbol_arg+"=", event_arg+"=")
try:
options, args = getopt.getopt(sys.argv[1:],output_arg+":",opt_list)
except getopt.GetoptError as err:
print (err)
symbol_val = ""
from_val = ""
to_val = ""
output_val = ""
event_val = "history"
for opt, value in options:
if opt[2:] == from_arg:
from_val = value
elif opt[2:] == to_arg:
to_val = value
elif opt[2:] == symbol_arg:
symbol_val = value
elif opt[2:] == event_arg:
event_val = value
elif opt[1:] == output_arg:
output_val = value
print ("downloading {}".format(symbol_val))
text = download_quote(symbol_val, from_val, to_val,event_val)
if text:
with open(output_val, 'wb') as f:
f.write(text)
print ("{} written to {}".format(symbol_val, output_val))
我在 Excel/VBA 中针对此问题开发了以下解决方案。关键挑战是创建 Crumb / Cookie 对。创建后,您可以重新使用它来调用 Yahoo 以获取历史价格。
在此处查看 Crumb / Cookie 的关键代码
Sub GetYahooRequest(strCrumb As String, strCookie As String)
'This routine will use a sample request to Yahoo to obtain a valid Cookie and Crumb
Dim strUrl As String: strUrl = "https://finance.yahoo.com/lookup?s=%7B0%7D"
Dim objRequest As WinHttp.WinHttpRequest
Set objRequest = New WinHttp.WinHttpRequest
With objRequest
.Open "GET", strUrl, True
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.send
.waitForResponse
strCrumb = strExtractCrumb(.responseText)
strCookie = Split(.getResponseHeader("Set-Cookie"), ";")(0)
End With
End Sub
请参阅我网站上的以下 Yahoo Historical Price Extract 示例 Excel 工作簿,该工作簿演示了如何提取 Yahoo 历史价格
Andrea 回答得好,我已将代码添加到您的代码中以允许下载多只股票。 (python 2.7)
文件 1: down.py
import os
myfile = open("ticker.csv", "r")
lines = myfile.readlines()
for line in lines:
ticker = line.strip();
cmd = "python get_quote_history.py --symbol=%s --from=2017-01-01 --to=2017-05-25 -o %s.csv" %(ticker,ticker)
os.system(cmd)
文件 2:ticker.csv 美国航空航天局 微软
文件 3:get_quote_history.py
我编写了一个轻量级脚本,该脚本汇集了此线程中的许多建议来解决此问题。 https://github.com/AndrewRPorter/yahoo-historical
但是,还有更好的解决方案,例如 https://github.com/ranaroussi/fix-yahoo-finance
希望这些资源对您有所帮助!