如何使用 matplotlib 为开始时间和结束时间高达毫秒的任务获取甘特图

How to get gantt plot using matplotlib for task with start time and end time upto millisecs

我在每个任务的数据框中都有数据,包括开始时间、结束时间和状态。 我想为此画一个甘特图。我尝试关注有关 Whosebug (link) 的其他问题,但他们使用了数值,因此无法使用它们。下面是代码。

import pandas as pd   
import matplotlib.pyplot as plt 
data = [['A', '2019-06-27 18:33:58.033', '2019-06-27 19:54:04.658', 'Success'], ['B', '2019-06-27 19:54:04.957', '2019-06-27 19:54:14.570', 'Success'], ['B', '2019-06-27 19:54:04.963', '2019-06-27 19:54:19.928', 'Failed']]
#Converting List to a dataframe
df = pd.DataFrame(data, columns = ['Task', 'Start Time', 'End Time', 'Status']) 
#Calculating the Time Difference
df['Duration'] = pd.to_datetime(df['End Time']) - pd.to_datetime(df['Start Time'])

color = {"Success":"turquoise", "Failed":"crimson"}
fig,ax=plt.subplots(figsize=(6,3))
labels=[]

for i, task in enumerate(df.groupby("Task")):
    labels.append(task[0])
    for r in task[1].groupby("Status"):
        data = r[1][["Start Time", "Duration"]]
        ax.broken_barh(data.values, (i-0.4,0.8), color=color[r[0]] )

ax.set_yticks(range(len(labels)))
ax.set_yticklabels(labels) 
ax.set_xlabel("time [ms]")
plt.tight_layout()       
plt.show()

它没有显示正确的图表,可能是由于时间格式。如果我用十进制数代替时间,上面的代码效果很好。这里有任何帮助。

我可以在 matplotlib 中使用时间绘制图表,但是无法为成功和失败的条形图设置不同的颜色。欢迎使用具有此功能的解决方案。

import pandas as pd    
from datetime import datetime
import matplotlib.dates as dates
import matplotlib.pyplot as plt
data = [['A', '2019-06-27 18:33:58.033', '2019-06-27 19:54:04.658', 'Success'], ['B', '2019-06-27 19:54:04.957', '2019-06-27 19:58:14.570', 'Success'], ['C', '2019-06-27 19:54:04.963', '2019-06-27 19:54:19.928', 'Failed']]
df = pd.DataFrame(data, columns = ['Task', 'Start_Time', 'End_Time', 'Status']) 

df_phase = df
df_phase['Start_Time'] = pd.to_datetime(df_phase['Start_Time'], format='%Y-%m-%d %H:%M:%S.%f')
df_phase['End_Time'] = pd.to_datetime(df_phase['End_Time'], format='%Y-%m-%d %H:%M:%S.%f')

#Convert DF columns into lists
sdate = df_phase['Start_Time'].tolist()
edate = df_phase['End_Time'].tolist()
tasks = df_phase['Task'].tolist()

#Convert time to Matplotlib number format
edate, sdate = [dates.date2num(item) for item in (edate, sdate)]
time_diff = edate - sdate
ypos = range(len(tasks))
fig, ax = plt.subplots()
ax.barh(ypos, time_diff, left=sdate, height=0.8, align='center', color='blue',edgecolor='black')
plt.yticks(ypos, tasks)
ax.axis('tight')

# We need to tell matplotlib that these are dates...
ax.xaxis_date()
plt.show()

输出图像:

似乎晚了,但这是您的代码,与 Rishi 的代码略有合并 -

import pandas as pd   
import matplotlib.pyplot as plt 
data = [['A', '2019-06-27 18:33:58.033', '2019-06-27 19:54:04.658', 'Success'], ['B', '2019-06-27 19:54:04.957', '2019-06-27 19:54:14.570', 'Success'], ['C', '2019-06-27 19:54:04.963', '2019-06-27 20:54:19.928', 'Failed']]
#Converting List to a dataframe
df = pd.DataFrame(data, columns = ['Task', 'Start_Time', 'End_Time', 'Status']) 
#Calculating the Time Difference
#df['Duration'] = pd.to_datetime(df['End Time']) - pd.to_datetime(df['Start Time'])
df_phase = df
df_phase['Start_Time'] = pd.to_datetime(df_phase['Start_Time'], format='%Y-%m-%d %H:%M:%S')
df_phase['End_Time'] = pd.to_datetime(df_phase['End_Time'], format='%Y-%m-%d %H:%M:%S')

color = {"Success":"turquoise", "Failed":"crimson"}
#Convert DF columns into lists
sdate = df_phase['Start_Time'].tolist()
edate = df_phase['End_Time'].tolist()
tasks = df_phase['Task'].tolist()
#Convert time to Matplotlib number format
edate, sdate = [dates.date2num(item) for item in (edate, sdate)]
df_phase['Duration']=edate - sdate
fig,ax=plt.subplots(figsize=(6,3))
labels=[]

for i, task in enumerate(df_phase.groupby("Task")):
    labels.append(task[0])
    for r in task[1].groupby("Status"):
        data = r[1][["Start_Time", "Duration"]]
        ax.broken_barh(data.values, (i-0.4,0.8), color=color[r[0]] )

ax.set_yticks(range(len(labels)))
ax.set_yticklabels(labels) 
ax.set_xlabel("time [ms]")
plt.tight_layout()       
plt.show()