Pandas:必须仅使用 asfreq 传递具有布尔值的 DataFrame
Pandas: Must pass DataFrame with boolean values only using as asfreq
我有以下代码,它给我带来了非常奇怪的错误,我的目标是用不同的标签回填数据的缺失值。如果我更改 df_filled=df.asfreq(freq='D').fillna(method='bfill', limit=1).dropna(how='all').drop_duplicates(keep='last')
则错误发生在这一行 df_filled[is_filled]
一切正常但是使用 freq=2D,df_filled[is_filled] 没有布尔形式。
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import random
##Generate the Data
np.random.seed(11)
date_today = datetime.now()
ndays = 15
df = pd.DataFrame({'date': [date_today + timedelta(days=(abs(np.random.randn(1))*2)[0]*x) for x in range(ndays)],
'test': pd.Series(np.random.randn(ndays)), 'test2':pd.Series(np.random.randn(ndays))})
df1=pd.DataFrame({'date': [date_today + timedelta(hours=x) for x in range(ndays)],
'test': pd.Series(np.random.randn(ndays)), 'test2':pd.Series(np.random.randn(ndays))})
df2=pd.DataFrame({'date': [date_today + timedelta(days=x)-timedelta(seconds=100*x) for x in range(ndays)],
'test': pd.Series(np.random.randn(ndays)), 'test2':pd.Series(np.random.randn(ndays))})
df=df.append(df1)
df=df.append(df2)
df = df.set_index('date').sort_index()
df = df.mask(np.random.random(df.shape) < .7)
df=df.reset_index()
df['test']=df['test'].astype(str)
df['test2']=df['test2'].astype(str)
df.replace('nan', np.nan, inplace = True)
##
df.set_index(df['date'].dt.date, inplace = True)
df = df[~df.index.duplicated(keep='first')]
df_filled=df.asfreq(freq='2D').fillna(method='bfill', limit=2).dropna(how='all').drop_duplicates(keep='last')
df_filled.set_index(df_filled['date'],inplace=True)
df_filled=df_filled.drop('date',1)
df.set_index(df['date'],inplace=True)
df=df.drop('date',1)
is_filled = (df.isnull() & df_filled.notnull()) | df.notnull()
df_filled[is_filled] ## error happens here
df_filled[is_filled]=df_filled[is_filled].applymap(lambda x: '_2D' if pd.notnull(x) else np.nan)
输出:
ValueError: Must pass DataFrame with boolean values only
在此先感谢您的帮助。
如果你 print(is_filled = (df.isnull() & df_filled.notnull()) | df.notnull())
那么你会看到你有 True
和 NaN
的混合。所以解决方案是将 NaN
值替换为 False
:
底部代码片段:
df=df.drop('date',1)
is_filled = (df.isnull() & df_filled.notnull()) | df.notnull()
is_filled = is_filled.fillna(False) # Fix here
df_filled[is_filled]=df_filled[is_filled].applymap(lambda x: '_2D' if pd.notnull(x) else np.nan)
我有以下代码,它给我带来了非常奇怪的错误,我的目标是用不同的标签回填数据的缺失值。如果我更改 df_filled=df.asfreq(freq='D').fillna(method='bfill', limit=1).dropna(how='all').drop_duplicates(keep='last')
则错误发生在这一行 df_filled[is_filled]
一切正常但是使用 freq=2D,df_filled[is_filled] 没有布尔形式。
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import random
##Generate the Data
np.random.seed(11)
date_today = datetime.now()
ndays = 15
df = pd.DataFrame({'date': [date_today + timedelta(days=(abs(np.random.randn(1))*2)[0]*x) for x in range(ndays)],
'test': pd.Series(np.random.randn(ndays)), 'test2':pd.Series(np.random.randn(ndays))})
df1=pd.DataFrame({'date': [date_today + timedelta(hours=x) for x in range(ndays)],
'test': pd.Series(np.random.randn(ndays)), 'test2':pd.Series(np.random.randn(ndays))})
df2=pd.DataFrame({'date': [date_today + timedelta(days=x)-timedelta(seconds=100*x) for x in range(ndays)],
'test': pd.Series(np.random.randn(ndays)), 'test2':pd.Series(np.random.randn(ndays))})
df=df.append(df1)
df=df.append(df2)
df = df.set_index('date').sort_index()
df = df.mask(np.random.random(df.shape) < .7)
df=df.reset_index()
df['test']=df['test'].astype(str)
df['test2']=df['test2'].astype(str)
df.replace('nan', np.nan, inplace = True)
##
df.set_index(df['date'].dt.date, inplace = True)
df = df[~df.index.duplicated(keep='first')]
df_filled=df.asfreq(freq='2D').fillna(method='bfill', limit=2).dropna(how='all').drop_duplicates(keep='last')
df_filled.set_index(df_filled['date'],inplace=True)
df_filled=df_filled.drop('date',1)
df.set_index(df['date'],inplace=True)
df=df.drop('date',1)
is_filled = (df.isnull() & df_filled.notnull()) | df.notnull()
df_filled[is_filled] ## error happens here
df_filled[is_filled]=df_filled[is_filled].applymap(lambda x: '_2D' if pd.notnull(x) else np.nan)
输出:
ValueError: Must pass DataFrame with boolean values only
在此先感谢您的帮助。
如果你 print(is_filled = (df.isnull() & df_filled.notnull()) | df.notnull())
那么你会看到你有 True
和 NaN
的混合。所以解决方案是将 NaN
值替换为 False
:
底部代码片段:
df=df.drop('date',1)
is_filled = (df.isnull() & df_filled.notnull()) | df.notnull()
is_filled = is_filled.fillna(False) # Fix here
df_filled[is_filled]=df_filled[is_filled].applymap(lambda x: '_2D' if pd.notnull(x) else np.nan)