是否可以使用多年的数据框计算年度 beta、alpha 和 rsquared?
Is it possible to calculate yearly beta, alpha and rsquared with a dataframe of multiple years?
我正在从事以下项目,但我在计算我拥有的全部 6 年数据的年度 beta、alpha 和 rsquared 而不是每一个中的一个时遇到问题。
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas_datareader import data as pdr
import datetime
import yfinance as yf
import statsmodels.api as sm
from statsmodels import regression
yf.pdr_override()
cryptos = ["BTC", "ETH", "XRP", "BCH", "USDT", #"BSV", #BSV does not have data in yahoo finance for the time period requested
"LTC", "BNB", "EOS", "LINK", "XMR",
"BTG", "ADA", "XLM", "TRX", "USDC"]
def get_and_process_data(c):
raw_data = pdr.get_data_yahoo(c + '-USD', start="2015-01-01", end="2021-01-01")
return raw_data.Close.pct_change()[1:]
df = pd.DataFrame({c: get_and_process_data(c) for c in cryptos})
df['MKT Return'] = df.mean(axis=1) # avg market return
print(df) # show dataframe with all data
def model(x, y):
# Calculate r-squared
X = sm.add_constant(x) # artificially add intercept to x, as advised in the docs
model = sm.OLS(y,X).fit()
rsquared = model.rsquared
# Fit linear regression and calculate alpha and beta
X = sm.add_constant(x)
model = regression.linear_model.OLS(y,X).fit()
alpha = model.params[0]
beta = model.params[1]
return beta, alpha, rsquared
results = pd.DataFrame({c: model(df[df[c].notnull()]['MKT Return'], df[df[c].notnull()][c]) for c in cryptos}).transpose()
results.columns = ['beta', 'alpha', 'rsquared']
# change format of each statistic
results['rsquared'] = results['rsquared'].round(decimals=2)
results['alpha'] = results['alpha'].round(decimals=4)
results['beta'] = results['beta'].round(decimals=2)
results['rsquared'] = results['rsquared'].mul(100).astype(int).astype(str).add('%') # change rsquared to percentage form
# rename index of statistics crypto labels
results = results.rename(index={'BTC': 'Bitcoin', 'ETH': 'Ethereum', 'XRP': 'Ripple', 'BCH': 'Bitcoin Cash', 'USDT': 'Tether',
'LTC': 'Litecoin', 'BNB': 'Binance Coin', 'EOS': 'EOS', 'LINK': 'Chainlink', 'XMR': 'Monero', 'BTG': 'Bitcoin Gold', 'ADA': 'Cardano',
'XLM': 'Stellar', 'TRX': 'TRON', 'USDC': 'USDCoin'})
# print all results
print(results)
while True:
try:
crypto = input("Cryptocurrency: ")
if crypto == "BTC" or crypto == "Bitcoin":
print(results.iloc[0,:])
elif crypto == "ETH" or crypto == "Ethereum":
print(results.iloc[1,:])
elif crypto == "XRP" or crypto == "Ripple":
print(results.iloc[2,:])
elif crypto == "BCH" or crypto == "Bitcoin Cash":
print(results.iloc[3,:])
elif crypto == "USDT" or crypto == "Tether":
print(results.iloc[4,:])
elif crypto == "LTC" or crypto == "Litecoin":
print(results.iloc[5,:])
elif crypto == "BNB" or crypto == "Binance Coin":
print(results.iloc[6,:])
elif crypto == "EOS":
print(results.iloc[7,:])
elif crypto == "LINK" or crypto == "Chainlink":
print(results.iloc[8,:])
elif crypto == "XMR" or crypto == "Monero":
print(results.iloc[9,:])
elif crypto == "BTG" or crypto == "Bitcoin Gold":
print(results.iloc[10,:])
elif crypto == "ADA" or crypto == "Cardano":
print(results.iloc[11,:])
elif crypto == "XLM" or crypto == "Stellar":
print(results.iloc[12,:])
elif crypto == "TRX" or crypto == "TRON":
print(results.iloc[13,:])
elif crypto == "USDC" or crypto == "USDCoin":
print(results.iloc[14,:])
else:
print("No Available Data")
except Exception as e:
print(e)
我如何创建一个或多个数据框,其中我有每年每种加密货币的统计数据,例如:比特币、以太坊、瑞波币、... - 2015 年、2016 年的 beta、alpha、rsquared, 2017 年,...
此外,是否可以简化 while True:
部分?它旨在成为一个用户输入功能,您可以在其中键入加密名称的变体,并且代码将输出相应数据的统计信息,同时再次提供 select 不同加密的选项,如果所需,如 if
和 elif
以及 try
、except
.
所示
您可以将结果部分包装在一个函数中,然后使用过滤后的数据集调用它。例如,这将为您提供 2020 年的数据。
df_2020 = df.filter(like="2020", axis=0)
您可以根据 df_2020 而不是整个 df 数据集来计算结果。要遍历它们,也许您可以创建一系列切片:
df_list = [df.filter(like=year, axis=0) for year in ("2015", "2016", "2017", "2018", "2019", "2020")]
您的 while 循环可以简单得多。这是一个行之有效的想法。请注意,您可以通过名称调用 results.loc[crypto]
而不是使用 .iloc
.
在行号中进行硬编码
abbrev = {'BTC': 'Bitcoin', 'ETH': 'Ethereum', 'XRP': 'Ripple', 'BCH': 'Bitcoin Cash', 'USDT': 'Tether',
'LTC': 'Litecoin', 'BNB': 'Binance Coin', 'EOS': 'EOS', 'LINK': 'Chainlink', 'XMR': 'Monero', 'BTG': 'Bitcoin Gold', 'ADA': 'Cardano',
'XLM': 'Stellar', 'TRX': 'TRON', 'USDC': 'USDCoin'}
while True:
crypto = input("Cryptocurrency: ")
crypto = abbrev.get(crypto, crypto) # if the abbreviation is given, try to get it from the abbrev dict
if crypto in results.index:
print(results.loc[crypto])
elif crypto in ["quit","stop","exit"]:
break
else:
print("No Available Data")
我正在从事以下项目,但我在计算我拥有的全部 6 年数据的年度 beta、alpha 和 rsquared 而不是每一个中的一个时遇到问题。
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas_datareader import data as pdr
import datetime
import yfinance as yf
import statsmodels.api as sm
from statsmodels import regression
yf.pdr_override()
cryptos = ["BTC", "ETH", "XRP", "BCH", "USDT", #"BSV", #BSV does not have data in yahoo finance for the time period requested
"LTC", "BNB", "EOS", "LINK", "XMR",
"BTG", "ADA", "XLM", "TRX", "USDC"]
def get_and_process_data(c):
raw_data = pdr.get_data_yahoo(c + '-USD', start="2015-01-01", end="2021-01-01")
return raw_data.Close.pct_change()[1:]
df = pd.DataFrame({c: get_and_process_data(c) for c in cryptos})
df['MKT Return'] = df.mean(axis=1) # avg market return
print(df) # show dataframe with all data
def model(x, y):
# Calculate r-squared
X = sm.add_constant(x) # artificially add intercept to x, as advised in the docs
model = sm.OLS(y,X).fit()
rsquared = model.rsquared
# Fit linear regression and calculate alpha and beta
X = sm.add_constant(x)
model = regression.linear_model.OLS(y,X).fit()
alpha = model.params[0]
beta = model.params[1]
return beta, alpha, rsquared
results = pd.DataFrame({c: model(df[df[c].notnull()]['MKT Return'], df[df[c].notnull()][c]) for c in cryptos}).transpose()
results.columns = ['beta', 'alpha', 'rsquared']
# change format of each statistic
results['rsquared'] = results['rsquared'].round(decimals=2)
results['alpha'] = results['alpha'].round(decimals=4)
results['beta'] = results['beta'].round(decimals=2)
results['rsquared'] = results['rsquared'].mul(100).astype(int).astype(str).add('%') # change rsquared to percentage form
# rename index of statistics crypto labels
results = results.rename(index={'BTC': 'Bitcoin', 'ETH': 'Ethereum', 'XRP': 'Ripple', 'BCH': 'Bitcoin Cash', 'USDT': 'Tether',
'LTC': 'Litecoin', 'BNB': 'Binance Coin', 'EOS': 'EOS', 'LINK': 'Chainlink', 'XMR': 'Monero', 'BTG': 'Bitcoin Gold', 'ADA': 'Cardano',
'XLM': 'Stellar', 'TRX': 'TRON', 'USDC': 'USDCoin'})
# print all results
print(results)
while True:
try:
crypto = input("Cryptocurrency: ")
if crypto == "BTC" or crypto == "Bitcoin":
print(results.iloc[0,:])
elif crypto == "ETH" or crypto == "Ethereum":
print(results.iloc[1,:])
elif crypto == "XRP" or crypto == "Ripple":
print(results.iloc[2,:])
elif crypto == "BCH" or crypto == "Bitcoin Cash":
print(results.iloc[3,:])
elif crypto == "USDT" or crypto == "Tether":
print(results.iloc[4,:])
elif crypto == "LTC" or crypto == "Litecoin":
print(results.iloc[5,:])
elif crypto == "BNB" or crypto == "Binance Coin":
print(results.iloc[6,:])
elif crypto == "EOS":
print(results.iloc[7,:])
elif crypto == "LINK" or crypto == "Chainlink":
print(results.iloc[8,:])
elif crypto == "XMR" or crypto == "Monero":
print(results.iloc[9,:])
elif crypto == "BTG" or crypto == "Bitcoin Gold":
print(results.iloc[10,:])
elif crypto == "ADA" or crypto == "Cardano":
print(results.iloc[11,:])
elif crypto == "XLM" or crypto == "Stellar":
print(results.iloc[12,:])
elif crypto == "TRX" or crypto == "TRON":
print(results.iloc[13,:])
elif crypto == "USDC" or crypto == "USDCoin":
print(results.iloc[14,:])
else:
print("No Available Data")
except Exception as e:
print(e)
我如何创建一个或多个数据框,其中我有每年每种加密货币的统计数据,例如:比特币、以太坊、瑞波币、... - 2015 年、2016 年的 beta、alpha、rsquared, 2017 年,...
此外,是否可以简化 while True:
部分?它旨在成为一个用户输入功能,您可以在其中键入加密名称的变体,并且代码将输出相应数据的统计信息,同时再次提供 select 不同加密的选项,如果所需,如 if
和 elif
以及 try
、except
.
您可以将结果部分包装在一个函数中,然后使用过滤后的数据集调用它。例如,这将为您提供 2020 年的数据。
df_2020 = df.filter(like="2020", axis=0)
您可以根据 df_2020 而不是整个 df 数据集来计算结果。要遍历它们,也许您可以创建一系列切片:
df_list = [df.filter(like=year, axis=0) for year in ("2015", "2016", "2017", "2018", "2019", "2020")]
您的 while 循环可以简单得多。这是一个行之有效的想法。请注意,您可以通过名称调用 results.loc[crypto]
而不是使用 .iloc
.
abbrev = {'BTC': 'Bitcoin', 'ETH': 'Ethereum', 'XRP': 'Ripple', 'BCH': 'Bitcoin Cash', 'USDT': 'Tether',
'LTC': 'Litecoin', 'BNB': 'Binance Coin', 'EOS': 'EOS', 'LINK': 'Chainlink', 'XMR': 'Monero', 'BTG': 'Bitcoin Gold', 'ADA': 'Cardano',
'XLM': 'Stellar', 'TRX': 'TRON', 'USDC': 'USDCoin'}
while True:
crypto = input("Cryptocurrency: ")
crypto = abbrev.get(crypto, crypto) # if the abbreviation is given, try to get it from the abbrev dict
if crypto in results.index:
print(results.loc[crypto])
elif crypto in ["quit","stop","exit"]:
break
else:
print("No Available Data")