如何使用 Pandas 从非常大的时间序列中自动绘制多个 'chunks' 数据?
How can I automate the plotting of multiple 'chunks' of data from a very large time-series using Pandas?
我的目标是能够从名为 'parsed.csv' 的大型时间序列数据集中为 'event.csv' 中的每个 事件 生成时间序列图.
我能够通过根据需要手动定义具有 +/- 12 小时缓冲区的事件的所需时间范围来成功绘制单个事件。有数百个事件,使得某种自动化成为必要。我是 loops/automation 的新手,一直非常困惑。
代码:
import matplotlib.pyplot as plt
import pandas as pd
df_event = pd.read_csv('event.csv',parse_dates['Date_Time'],index_col= ['Date_Time'])
df = pd.read_csv('parsed.csv',parse_dates=['Date_Time'],index_col= ['Date_Time'])
df.Verified = pd.to_numeric(df.Verified, errors='coerce') #forces columns to float64 dtype
df.dropna(axis='index',how='any',inplace=True) #fixes any null values
df = df.loc['2018-05-01':'2018-05-06'] #can manually define event using this
fig, axs = plt.subplots(figsize=(12, 6)) #define axis, and plots
df.plot(ax=axs)
Sample of my large time-series csv dataset:
Predicted Verified
Date_Time
2010-01-01 00:00:00 5.161 5.56
2010-01-01 00:06:00 5.187 5.57
2010-01-01 00:12:00 5.208 5.56
2010-01-01 00:18:00 5.222 5.55
2010-01-01 00:24:00 5.230 5.53
... ...
2020-12-31 23:30:00 3.342 3.81
2020-12-31 23:36:00 3.447 3.92
2020-12-31 23:42:00 3.549 4.03
2020-12-31 23:48:00 3.646 4.14
2020-12-31 23:54:00 3.739 4.24
Event.csv sample:
Verified
Date_Time
2010-01-06 12:05:00 5.161
2010-03-13 02:06:00 5.187
2010-07-24 06:13:00 5.208
这是一种使用 Bokeh 制作有趣的互动情节的方法。
import pandas as pd
from pandas import Timestamp
from bokeh.plotting import figure, show, output_file, ColumnDataSource, save
from bokeh.models import WheelZoomTool
from bokeh.models import PanTool
from bokeh.models import ResetTool
from bokeh.models import SaveTool
from bokeh.models import BoxZoomTool
from bokeh.models import CrosshairTool
from bokeh.models import HoverTool
df_dict = {'Date_Time': {0: '2010-01-01 00:00:00',
1: '2010-01-01 00:06:00',
2: '2010-01-01 00:12:00',
3: '2010-01-01 00:18:00',
4: '2010-01-01 00:24:00',
5: '2020-12-31 23:30:00',
6: '2020-12-31 23:36:00',
7: '2020-12-31 23:42:00',
8: '2020-12-31 23:48:00',
9: '2020-12-31 23:54:00'},
'Predicted': {0: 5.161,
1: 5.187,
2: 5.208,
3: 5.222,
4: 5.23,
5: 3.342,
6: 3.447,
7: 3.549,
8: 3.646,
9: 3.739},
'Verified': {0: 5.56,
1: 5.57,
2: 5.56,
3: 5.55,
4: 5.53,
5: 3.81,
6: 3.92,
7: 4.03,
8: 4.14,
9: 4.24}}
df = pd.DataFrame(df_dict)
df['Date_Time'] = pd.to_datetime(df['Date_Time'])
df.set_index('Date_Time', inplace=True)
def do_bokeh_plot(df, idx):
# Bokeh likes string for tooltips
df['Date'] = df.index.astype(str)
p = figure(plot_width=1200,
plot_height=800,
x_axis_type="datetime",
y_range=(1, 10),
title="sample plot")
col_list = ['Predicted', 'Verified'] #or df.columns
color_list =['red', 'blue']
for col, color in zip(col_list, color_list):
source = df
rend = p.line(x='Date_Time',
y=col,
source=source,
legend_label=col,
color=color,
line_width=1.5)
p.add_tools(HoverTool(renderers=[rend],
tooltips=[("Value", "@{" + col + "}"),
("Date_Time", "@{Date}")],
mode='mouse'))
p.legend.click_policy="hide"
p.yaxis.axis_label='values'
p.xaxis.axis_label='Date'
# you might get a lot of plots in your browser with this
#show(p)
# save each plot with name as idx value from event_df
output_file(idx.strftime('%y%m%d-%H%M%S') + ".html")
event_df_dict = {'Verified': {Timestamp('2010-01-01 00:00:00'): 5.56,
Timestamp('2010-01-01 00:06:00'): 5.57,
Timestamp('2010-01-01 00:12:00'): 5.56,
Timestamp('2010-01-01 00:18:00'): 5.55,
Timestamp('2010-01-01 00:24:00'): 5.53,
Timestamp('2020-12-31 23:30:00'): 3.81,
Timestamp('2020-12-31 23:36:00'): 3.92,
Timestamp('2020-12-31 23:42:00'): 4.03,
Timestamp('2020-12-31 23:48:00'): 4.14,
Timestamp('2020-12-31 23:54:00'): 4.24}}
event_df = pd.DataFrame(event_df_dict)
for idx in event_df.index:
# use timedelta to select a slive of df from each event_df idx
df_window = df.loc[(df.index > idx + pd.to_timedelta(-12, 'h')) & (df.index <= idx + pd.to_timedelta(12,'h'))]
do_bokeh_plot(df_window, idx)
我的目标是能够从名为 'parsed.csv' 的大型时间序列数据集中为 'event.csv' 中的每个 事件 生成时间序列图.
我能够通过根据需要手动定义具有 +/- 12 小时缓冲区的事件的所需时间范围来成功绘制单个事件。有数百个事件,使得某种自动化成为必要。我是 loops/automation 的新手,一直非常困惑。
代码:
import matplotlib.pyplot as plt
import pandas as pd
df_event = pd.read_csv('event.csv',parse_dates['Date_Time'],index_col= ['Date_Time'])
df = pd.read_csv('parsed.csv',parse_dates=['Date_Time'],index_col= ['Date_Time'])
df.Verified = pd.to_numeric(df.Verified, errors='coerce') #forces columns to float64 dtype
df.dropna(axis='index',how='any',inplace=True) #fixes any null values
df = df.loc['2018-05-01':'2018-05-06'] #can manually define event using this
fig, axs = plt.subplots(figsize=(12, 6)) #define axis, and plots
df.plot(ax=axs)
Sample of my large time-series csv dataset:
Predicted Verified
Date_Time
2010-01-01 00:00:00 5.161 5.56
2010-01-01 00:06:00 5.187 5.57
2010-01-01 00:12:00 5.208 5.56
2010-01-01 00:18:00 5.222 5.55
2010-01-01 00:24:00 5.230 5.53
... ...
2020-12-31 23:30:00 3.342 3.81
2020-12-31 23:36:00 3.447 3.92
2020-12-31 23:42:00 3.549 4.03
2020-12-31 23:48:00 3.646 4.14
2020-12-31 23:54:00 3.739 4.24
Event.csv sample:
Verified
Date_Time
2010-01-06 12:05:00 5.161
2010-03-13 02:06:00 5.187
2010-07-24 06:13:00 5.208
这是一种使用 Bokeh 制作有趣的互动情节的方法。
import pandas as pd
from pandas import Timestamp
from bokeh.plotting import figure, show, output_file, ColumnDataSource, save
from bokeh.models import WheelZoomTool
from bokeh.models import PanTool
from bokeh.models import ResetTool
from bokeh.models import SaveTool
from bokeh.models import BoxZoomTool
from bokeh.models import CrosshairTool
from bokeh.models import HoverTool
df_dict = {'Date_Time': {0: '2010-01-01 00:00:00',
1: '2010-01-01 00:06:00',
2: '2010-01-01 00:12:00',
3: '2010-01-01 00:18:00',
4: '2010-01-01 00:24:00',
5: '2020-12-31 23:30:00',
6: '2020-12-31 23:36:00',
7: '2020-12-31 23:42:00',
8: '2020-12-31 23:48:00',
9: '2020-12-31 23:54:00'},
'Predicted': {0: 5.161,
1: 5.187,
2: 5.208,
3: 5.222,
4: 5.23,
5: 3.342,
6: 3.447,
7: 3.549,
8: 3.646,
9: 3.739},
'Verified': {0: 5.56,
1: 5.57,
2: 5.56,
3: 5.55,
4: 5.53,
5: 3.81,
6: 3.92,
7: 4.03,
8: 4.14,
9: 4.24}}
df = pd.DataFrame(df_dict)
df['Date_Time'] = pd.to_datetime(df['Date_Time'])
df.set_index('Date_Time', inplace=True)
def do_bokeh_plot(df, idx):
# Bokeh likes string for tooltips
df['Date'] = df.index.astype(str)
p = figure(plot_width=1200,
plot_height=800,
x_axis_type="datetime",
y_range=(1, 10),
title="sample plot")
col_list = ['Predicted', 'Verified'] #or df.columns
color_list =['red', 'blue']
for col, color in zip(col_list, color_list):
source = df
rend = p.line(x='Date_Time',
y=col,
source=source,
legend_label=col,
color=color,
line_width=1.5)
p.add_tools(HoverTool(renderers=[rend],
tooltips=[("Value", "@{" + col + "}"),
("Date_Time", "@{Date}")],
mode='mouse'))
p.legend.click_policy="hide"
p.yaxis.axis_label='values'
p.xaxis.axis_label='Date'
# you might get a lot of plots in your browser with this
#show(p)
# save each plot with name as idx value from event_df
output_file(idx.strftime('%y%m%d-%H%M%S') + ".html")
event_df_dict = {'Verified': {Timestamp('2010-01-01 00:00:00'): 5.56,
Timestamp('2010-01-01 00:06:00'): 5.57,
Timestamp('2010-01-01 00:12:00'): 5.56,
Timestamp('2010-01-01 00:18:00'): 5.55,
Timestamp('2010-01-01 00:24:00'): 5.53,
Timestamp('2020-12-31 23:30:00'): 3.81,
Timestamp('2020-12-31 23:36:00'): 3.92,
Timestamp('2020-12-31 23:42:00'): 4.03,
Timestamp('2020-12-31 23:48:00'): 4.14,
Timestamp('2020-12-31 23:54:00'): 4.24}}
event_df = pd.DataFrame(event_df_dict)
for idx in event_df.index:
# use timedelta to select a slive of df from each event_df idx
df_window = df.loc[(df.index > idx + pd.to_timedelta(-12, 'h')) & (df.index <= idx + pd.to_timedelta(12,'h'))]
do_bokeh_plot(df_window, idx)