使用 Datetime 索引分析 dataframe 数据
Using Datetime indexing to analyse dataframe data
我正在尝试编写一个程序来计算指定日期和时间内的平均压力、温度和湿度……但不确定为什么我会得到 'nan' 值……?这是我的代码:有什么想法吗?
import pandas as pd
import numpy as np
df = pd.DataFrame.from_csv('C:\Users\Joey\Desktop\Python\CSV\TM4CVC.csv',index_col = None)
df2 = pd.DataFrame({'temp':df['Ch1_Value'],
'press':df['Ch2_Value'],
'humid':df['Ch3_Value'], 'Date' : df['Date'], 'Time' : df['Time']})
df2['DateTime'] = pd.to_datetime(df2.apply(lambda x: x['Date']+ ' '+ x['Time'], 1))
df2.index = pd.to_datetime(df2.pop('DateTime'))
df3 = df2.drop(['Date', 'Time'], 1)
#------------------------------------------------------------------------------
def TempPressHumid(datetime_i, datetime_e):
index = df3[datetime_i:datetime_e]
out = {'temp_avg':np.mean(index['temp']),
'temp_std':np.std(index['temp']),
'press_avg':np.mean(index['press']),
'press_std':np.std(index['press']),
'humid_avg':np.mean(index['humid']),
'humid_std':np.std(index['humid'])}
print out
TempPressHumid(datetime_i = '2012-06-25 08:27:19', datetime_e = '2012-01-25 10:59:33')
我的输出是:
{'humid_std': nan, 'press_std': nan, 'humid_avg': nan, 'temp_avg': nan, 'temp_std': nan, 'press_avg': nan}
print df3 给我:
humid press temp
DateTime
2012-06-25 08:21:19 1004.0 21.2 26.0
2012-06-25 08:22:19 1004.0 21.2 26.0
2012-06-25 08:23:19 1004.1 21.3 26.0
-----------------------------------------
等...
您可以尝试这样的操作:
a = pd.Series(np.random.random_sample(1000))
b = pd.Series(np.random.random_sample(1000))
c = pd.Series(np.random.random_sample(1000))
df = pd.DataFrame({"temp": a, "press": b, "humid": c})
i = pd.date_range('20120625', periods=1000, freq="h")
df.index = pd.to_datetime(i)
此时数据框df
看起来像
humid press temp
2012-06-25 00:00:00 0.910517 0.588777 ...
2012-06-25 01:00:00 0.742219 0.501180
2012-06-25 02:00:00 0.810515 0.172370
2012-06-25 03:00:00 0.215735 0.046797
2012-06-25 04:00:00 0.094144 0.822310
2012-06-25 05:00:00 0.662934 0.629981
2012-06-25 06:00:00 0.876086 0.586799
...
现在让我们计算所需日期范围的均值和标准差
def TempPressHumid(start, end, df):
values = {'temp_mean':np.mean(df['temp'][start:end]),
'temp_std':np.std(df['temp'][start:end]),
'press_mean':np.mean(df['press'][start:end]),
'press_std':np.std(df['press'][start:end]),
'humid_mean':np.mean(df['humid'][start:end]),
'humid_std':np.std(df['humid'][start:end]),
}
print(values)
return
因此,如果您调用 TempPressHumid('2012-06-25 08:00:00', '2012-07-25 10:00:00', df)
,您应该会看到所需值的字典。
我正在尝试编写一个程序来计算指定日期和时间内的平均压力、温度和湿度……但不确定为什么我会得到 'nan' 值……?这是我的代码:有什么想法吗?
import pandas as pd
import numpy as np
df = pd.DataFrame.from_csv('C:\Users\Joey\Desktop\Python\CSV\TM4CVC.csv',index_col = None)
df2 = pd.DataFrame({'temp':df['Ch1_Value'],
'press':df['Ch2_Value'],
'humid':df['Ch3_Value'], 'Date' : df['Date'], 'Time' : df['Time']})
df2['DateTime'] = pd.to_datetime(df2.apply(lambda x: x['Date']+ ' '+ x['Time'], 1))
df2.index = pd.to_datetime(df2.pop('DateTime'))
df3 = df2.drop(['Date', 'Time'], 1)
#------------------------------------------------------------------------------
def TempPressHumid(datetime_i, datetime_e):
index = df3[datetime_i:datetime_e]
out = {'temp_avg':np.mean(index['temp']),
'temp_std':np.std(index['temp']),
'press_avg':np.mean(index['press']),
'press_std':np.std(index['press']),
'humid_avg':np.mean(index['humid']),
'humid_std':np.std(index['humid'])}
print out
TempPressHumid(datetime_i = '2012-06-25 08:27:19', datetime_e = '2012-01-25 10:59:33')
我的输出是:
{'humid_std': nan, 'press_std': nan, 'humid_avg': nan, 'temp_avg': nan, 'temp_std': nan, 'press_avg': nan}
print df3 给我:
humid press temp
DateTime
2012-06-25 08:21:19 1004.0 21.2 26.0
2012-06-25 08:22:19 1004.0 21.2 26.0
2012-06-25 08:23:19 1004.1 21.3 26.0
-----------------------------------------
等...
您可以尝试这样的操作:
a = pd.Series(np.random.random_sample(1000))
b = pd.Series(np.random.random_sample(1000))
c = pd.Series(np.random.random_sample(1000))
df = pd.DataFrame({"temp": a, "press": b, "humid": c})
i = pd.date_range('20120625', periods=1000, freq="h")
df.index = pd.to_datetime(i)
此时数据框df
看起来像
humid press temp
2012-06-25 00:00:00 0.910517 0.588777 ...
2012-06-25 01:00:00 0.742219 0.501180
2012-06-25 02:00:00 0.810515 0.172370
2012-06-25 03:00:00 0.215735 0.046797
2012-06-25 04:00:00 0.094144 0.822310
2012-06-25 05:00:00 0.662934 0.629981
2012-06-25 06:00:00 0.876086 0.586799
...
现在让我们计算所需日期范围的均值和标准差
def TempPressHumid(start, end, df):
values = {'temp_mean':np.mean(df['temp'][start:end]),
'temp_std':np.std(df['temp'][start:end]),
'press_mean':np.mean(df['press'][start:end]),
'press_std':np.std(df['press'][start:end]),
'humid_mean':np.mean(df['humid'][start:end]),
'humid_std':np.std(df['humid'][start:end]),
}
print(values)
return
因此,如果您调用 TempPressHumid('2012-06-25 08:00:00', '2012-07-25 10:00:00', df)
,您应该会看到所需值的字典。