解析雅虎财经 python httperror 502
parsing yahoo finance python httperror 502
我有以下代码用于解析 yahoo finance 以备份我在 s&p500 上运行的代码。它因错误而停止-HTTP 错误 502:仅在 20 只股票后服务器挂断。有谁知道解析雅虎金融或解决此问题的更好方法吗?
try:
for stock in sp500:
save_path = location+'\_KeyStats\'+stock
name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
completeName = os.path.join(save_path, name_of_file+".html")
file1 = open(completeName, "w")
keyStat = urllib2.urlopen('https://au.finance.yahoo.com/q/ks?s='+stock).read()
file1.write(keyStat)
file1.close()
#income Statement
save_path = location+'\_AnnualEarnings\'+stock
name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
completeName = os.path.join(save_path, name_of_file+".html")
file1 = open(completeName, "w")
incomeState = urllib2.urlopen('https://au.finance.yahoo.com/q/is?s='+stock+'&annual').read()
file1.write(incomeState)
file1.close()
save_path = location+'\_QuarterlyEarnings\'+stock
name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
completeName = os.path.join(save_path, name_of_file+".html")
file1 = open(completeName, "w")
incomeState2 = urllib2.urlopen('https://au.finance.yahoo.com/q/is?s='+stock).read()
file1.write(incomeState2)
file1.close()
#Balance Sheet
save_path = location+'\_AnnaulBS\'+stock
name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
completeName = os.path.join(save_path, name_of_file+".html")
file1 = open(completeName, "w")
blanceSheet = urllib2.urlopen('https://au.finance.yahoo.com/q/bs?s='+stock+'&annual').read()
file1.write(blanceSheet)
file1.close()
save_path = location+'\_QuarterlyBS\'+stock
name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
completeName = os.path.join(save_path, name_of_file+".html")
file1 = open(completeName, "w")
blanceSheet2 = urllib2.urlopen('https://au.finance.yahoo.com/q/bs?s='+stock).read()
file1.write(blanceSheet2)
file1.close()
#Cash Flow
save_path = location+'\_AnnaulCF\'+stock
name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
completeName = os.path.join(save_path, name_of_file+".html")
file1 = open(completeName, "w")
cashFlow = urllib2.urlopen('https://au.finance.yahoo.com/q/cf?s='+stock+'&annual').read()
file1.write(cashFlow)
file1.close()
save_path = location+'\_QuarterlyCF\'+stock
name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
completeName = os.path.join(save_path, name_of_file+".html")
file1 = open(completeName, "w")
cashFlow2 = urllib2.urlopen('https://au.finance.yahoo.com/q/cf?s='+stock).read()
file1.write(cashFlow2)
file1.close()
print stock
except Exception, e:
print 'failed main loop', str(e)
您应该为此使用 pandas。假设您有一个包含所有股票的文件:
sp500.txt
AAPL
GLD
SPX
MCD
现在你可以做:
from pandas.io.data import DataReader
from pandas import Panel, DataFrame
import datetime
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2013, 1, 27)
with open('sp500.txt') as f:
symbols = f.read().splitlines() # ['AAPL', 'GLD', 'SPX', 'MCD']
data = dict((symbol, DataReader(symbol, "yahoo", start, end, pause=1)) for symbol in symbols)
panel = Panel(data).swapaxes('items', 'minor')
closing = panel['Close'].dropna()
print closing.head()
输出:
AAPL GLD MCD SPX
Date
2010-01-04 214.01 109.80 62.78 0.03
2010-01-05 214.38 109.70 62.30 0.03
2010-01-06 210.97 111.51 61.45 0.03
2010-01-07 210.58 110.82 61.90 0.03
2010-01-08 211.98 111.37 61.84 0.04
注意 DataReader 调用中的 pause=1
以避免达到 API 限制。如果您想将结果保存到文件中,您可以使用:
closing.to_csv('output.csv')
我有以下代码用于解析 yahoo finance 以备份我在 s&p500 上运行的代码。它因错误而停止-HTTP 错误 502:仅在 20 只股票后服务器挂断。有谁知道解析雅虎金融或解决此问题的更好方法吗?
try:
for stock in sp500:
save_path = location+'\_KeyStats\'+stock
name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
completeName = os.path.join(save_path, name_of_file+".html")
file1 = open(completeName, "w")
keyStat = urllib2.urlopen('https://au.finance.yahoo.com/q/ks?s='+stock).read()
file1.write(keyStat)
file1.close()
#income Statement
save_path = location+'\_AnnualEarnings\'+stock
name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
completeName = os.path.join(save_path, name_of_file+".html")
file1 = open(completeName, "w")
incomeState = urllib2.urlopen('https://au.finance.yahoo.com/q/is?s='+stock+'&annual').read()
file1.write(incomeState)
file1.close()
save_path = location+'\_QuarterlyEarnings\'+stock
name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
completeName = os.path.join(save_path, name_of_file+".html")
file1 = open(completeName, "w")
incomeState2 = urllib2.urlopen('https://au.finance.yahoo.com/q/is?s='+stock).read()
file1.write(incomeState2)
file1.close()
#Balance Sheet
save_path = location+'\_AnnaulBS\'+stock
name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
completeName = os.path.join(save_path, name_of_file+".html")
file1 = open(completeName, "w")
blanceSheet = urllib2.urlopen('https://au.finance.yahoo.com/q/bs?s='+stock+'&annual').read()
file1.write(blanceSheet)
file1.close()
save_path = location+'\_QuarterlyBS\'+stock
name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
completeName = os.path.join(save_path, name_of_file+".html")
file1 = open(completeName, "w")
blanceSheet2 = urllib2.urlopen('https://au.finance.yahoo.com/q/bs?s='+stock).read()
file1.write(blanceSheet2)
file1.close()
#Cash Flow
save_path = location+'\_AnnaulCF\'+stock
name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
completeName = os.path.join(save_path, name_of_file+".html")
file1 = open(completeName, "w")
cashFlow = urllib2.urlopen('https://au.finance.yahoo.com/q/cf?s='+stock+'&annual').read()
file1.write(cashFlow)
file1.close()
save_path = location+'\_QuarterlyCF\'+stock
name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
completeName = os.path.join(save_path, name_of_file+".html")
file1 = open(completeName, "w")
cashFlow2 = urllib2.urlopen('https://au.finance.yahoo.com/q/cf?s='+stock).read()
file1.write(cashFlow2)
file1.close()
print stock
except Exception, e:
print 'failed main loop', str(e)
您应该为此使用 pandas。假设您有一个包含所有股票的文件:
sp500.txt
AAPL
GLD
SPX
MCD
现在你可以做:
from pandas.io.data import DataReader
from pandas import Panel, DataFrame
import datetime
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2013, 1, 27)
with open('sp500.txt') as f:
symbols = f.read().splitlines() # ['AAPL', 'GLD', 'SPX', 'MCD']
data = dict((symbol, DataReader(symbol, "yahoo", start, end, pause=1)) for symbol in symbols)
panel = Panel(data).swapaxes('items', 'minor')
closing = panel['Close'].dropna()
print closing.head()
输出:
AAPL GLD MCD SPX
Date
2010-01-04 214.01 109.80 62.78 0.03
2010-01-05 214.38 109.70 62.30 0.03
2010-01-06 210.97 111.51 61.45 0.03
2010-01-07 210.58 110.82 61.90 0.03
2010-01-08 211.98 111.37 61.84 0.04
注意 DataReader 调用中的 pause=1
以避免达到 API 限制。如果您想将结果保存到文件中,您可以使用:
closing.to_csv('output.csv')