如何在不更改数据框核心的情况下对数据框进行重新采样?
How to resample the dataframe without changing it's core?
如何在不改变其核心的情况下对数据帧进行重新采样?
import pandas as pd
import sys
if sys.version_info[0] < 3:
from StringIO import StringIO
else:
from io import StringIO
csvdata = StringIO("""date,LASTA,LASTB,LASTC
1999-03-15,2.5597,8.20145,16.900
1999-03-16,2.6349,8.03439,17.150
1999-03-17,2.6375,8.12431,17.125
1999-03-18,2.6375,8.27908,16.950
1999-03-19,2.6634,8.54914,17.325
1999-03-22,2.6721,8.32183,17.195
1999-03-23,2.6998,8.21218,16.725
1999-03-24,2.6773,8.15284,16.350
1999-03-25,2.6807,8.08378,17.030
1999-03-26,2.7802,8.14038,16.725
1999-03-29,2.8139,8.07832,16.800
1999-03-30,2.8105,8.10124,16.775
1999-03-31,2.7724,7.73057,16.955
1999-04-01,2.8321,7.63714,17.500
1999-04-06,2.8537,7.63703,17.750""")
df = pd.read_csv(csvdata, sep=",", index_col="date", parse_dates=True, infer_datetime_format=True)
这是我的代码...
# Join 3 stock DataFrame together
full_df = pd.concat([AAAA, BBBB, CCCC], axis=1).dropna()
# Resample the full DataFrame to monthly timeframe
monthly_df = full_df.resample('BMS').first()
# Calculate daily returns of stocks
returns_daily = full_df.pct_change()
# Calculate monthly returns of the stocks
returns_monthly = monthly_df.pct_change().dropna()
print(returns_monthly.tail())
这是我得到的错误...
TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'Index'
我已经尝试过 pd.Dataframe
然后 DataTimeIndex
甚至 pd.to_datetime
,但不知何故我只会让事情变得更糟
问题是串联数据帧的索引不是可重采样的类型。这是 MCVE 的工作版本,展示了如何将数据索引转换为可重新采样的类型。
import pandas as pd
import sys
if sys.version_info[0] < 3:
from StringIO import StringIO
else:
from io import StringIO
csvdata = StringIO("""date,LASTA,LASTB,LASTC
1999-03-15,2.5597,8.20145,16.900
1999-03-16,2.6349,8.03439,17.150
1999-03-17,2.6375,8.12431,17.125
1999-03-18,2.6375,8.27908,16.950
1999-03-19,2.6634,8.54914,17.325
1999-03-22,2.6721,8.32183,17.195
1999-03-23,2.6998,8.21218,16.725
1999-03-24,2.6773,8.15284,16.350
1999-03-25,2.6807,8.08378,17.030
1999-03-26,2.7802,8.14038,16.725
1999-03-29,2.8139,8.07832,16.800
1999-03-30,2.8105,8.10124,16.775
1999-03-31,2.7724,7.73057,16.955
1999-04-01,2.8321,7.63714,17.500
1999-04-06,2.8537,7.63703,17.750""")
#df = pd.read_csv(csvdata, sep=",", index_col="date", parse_dates=True, infer_datetime_format=True)
#print(type(df.index))
df = pd.read_csv(csvdata, sep=",")
df.set_index(['date'], inplace=True)
print(type(df.index))
df.index = pd.to_datetime(df.index)
print(type(df.index))
# Join 3 stock DataFrame together
full_df = pd.concat([df, df, df], axis=1).dropna()
#print(full_df)
# Resample the full DataFrame to monthly timeframe
monthly_df = full_df.resample('BMS').first()
#print(monthly_df)
# Calculate daily returns of stocks
returns_daily = full_df.pct_change()
#print(returns_daily)
# Calculate monthly returns of the stocks
returns_monthly = monthly_df.pct_change().dropna()
#print(returns_monthly.tail())
请将索引转换为日期时间索引:
full_df.index = pd.to_datetime(full_df.index)
如何在不改变其核心的情况下对数据帧进行重新采样?
import pandas as pd
import sys
if sys.version_info[0] < 3:
from StringIO import StringIO
else:
from io import StringIO
csvdata = StringIO("""date,LASTA,LASTB,LASTC
1999-03-15,2.5597,8.20145,16.900
1999-03-16,2.6349,8.03439,17.150
1999-03-17,2.6375,8.12431,17.125
1999-03-18,2.6375,8.27908,16.950
1999-03-19,2.6634,8.54914,17.325
1999-03-22,2.6721,8.32183,17.195
1999-03-23,2.6998,8.21218,16.725
1999-03-24,2.6773,8.15284,16.350
1999-03-25,2.6807,8.08378,17.030
1999-03-26,2.7802,8.14038,16.725
1999-03-29,2.8139,8.07832,16.800
1999-03-30,2.8105,8.10124,16.775
1999-03-31,2.7724,7.73057,16.955
1999-04-01,2.8321,7.63714,17.500
1999-04-06,2.8537,7.63703,17.750""")
df = pd.read_csv(csvdata, sep=",", index_col="date", parse_dates=True, infer_datetime_format=True)
这是我的代码...
# Join 3 stock DataFrame together
full_df = pd.concat([AAAA, BBBB, CCCC], axis=1).dropna()
# Resample the full DataFrame to monthly timeframe
monthly_df = full_df.resample('BMS').first()
# Calculate daily returns of stocks
returns_daily = full_df.pct_change()
# Calculate monthly returns of the stocks
returns_monthly = monthly_df.pct_change().dropna()
print(returns_monthly.tail())
这是我得到的错误...
TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'Index'
我已经尝试过 pd.Dataframe
然后 DataTimeIndex
甚至 pd.to_datetime
,但不知何故我只会让事情变得更糟
问题是串联数据帧的索引不是可重采样的类型。这是 MCVE 的工作版本,展示了如何将数据索引转换为可重新采样的类型。
import pandas as pd
import sys
if sys.version_info[0] < 3:
from StringIO import StringIO
else:
from io import StringIO
csvdata = StringIO("""date,LASTA,LASTB,LASTC
1999-03-15,2.5597,8.20145,16.900
1999-03-16,2.6349,8.03439,17.150
1999-03-17,2.6375,8.12431,17.125
1999-03-18,2.6375,8.27908,16.950
1999-03-19,2.6634,8.54914,17.325
1999-03-22,2.6721,8.32183,17.195
1999-03-23,2.6998,8.21218,16.725
1999-03-24,2.6773,8.15284,16.350
1999-03-25,2.6807,8.08378,17.030
1999-03-26,2.7802,8.14038,16.725
1999-03-29,2.8139,8.07832,16.800
1999-03-30,2.8105,8.10124,16.775
1999-03-31,2.7724,7.73057,16.955
1999-04-01,2.8321,7.63714,17.500
1999-04-06,2.8537,7.63703,17.750""")
#df = pd.read_csv(csvdata, sep=",", index_col="date", parse_dates=True, infer_datetime_format=True)
#print(type(df.index))
df = pd.read_csv(csvdata, sep=",")
df.set_index(['date'], inplace=True)
print(type(df.index))
df.index = pd.to_datetime(df.index)
print(type(df.index))
# Join 3 stock DataFrame together
full_df = pd.concat([df, df, df], axis=1).dropna()
#print(full_df)
# Resample the full DataFrame to monthly timeframe
monthly_df = full_df.resample('BMS').first()
#print(monthly_df)
# Calculate daily returns of stocks
returns_daily = full_df.pct_change()
#print(returns_daily)
# Calculate monthly returns of the stocks
returns_monthly = monthly_df.pct_change().dropna()
#print(returns_monthly.tail())
请将索引转换为日期时间索引:
full_df.index = pd.to_datetime(full_df.index)