如何在 R 中按时间 'bin' 累计总和 - 使用 ggplot

How to cumulative sum by time 'bin' in R - with ggplot

我正在尝试创建一个累积图 as shown here,还有一个警告。这些步骤应该基于 2 分钟的时间间隔,因此一个间隔可能有多个甚至没有条目。

我使用 rowSums 为要在 cumsum 中使用的值创建列, 例如,

df_so $intraverbal <- rowSums(df_so[-1] == "intraverbal")
df_so$tact <- rowSums(df_so[-1] == "tact")
df_so$mand <- rowSums(df_so[-1] == "mand")
df_so$echoic <- rowSums(df_so[-1] == "echoic")

图表使用 plot:

plot(cumsum(df_so$intraverbal), type="s")

但是,它有几个不足之处。理想情况下,数据将根据 "time bin" 进行统计和标记。至少,时间箱应该在 x 标签上,但增量不是连续的。假设地,我应该使用 dplyr 或 lapply 来融化和组合它们 - 但我不确定如何。也许,如 here.

所述

最好用 ggplot 来完成这个,这样不同的 cumsum 可以在同一个图表上,例如 here, or perhaps with stat_bin as here

这是数据的一个小样本:

df_so <- structure(list(time.bin = structure(c(1L, 1L, 1L, 1L, 1L, 1L,1L, 124L, 124L, 124L), .Label = c("0:00:00", "0:02:00", "0:04:00","0:06:00", "0:08:00", "0:10:00", "0:12:00", "0:14:00", "0:16:00","0:18:00",
         "0:20:00", "0:22:00", "0:24:00", "0:26:00", "0:28:00","0:30:00", "0:32:00", "0:34:00", "0:36:00", "0:38:00", "0:40:00","0:42:00", "0:44:00", "0:46:00", "0:48:00", "0:50:00", "0:52:00","0:54:00", "0:56:00", "0:58:00",
         "1:00:00", "1:02:00", "1:04:00","1:06:00", "1:08:00", "1:10:00", "1:12:00", "1:14:00", "1:16:00","1:18:00", "1:20:00", "1:22:00", "1:24:00", "1:26:00", "1:28:00","1:30:00", "1:32:00", "1:34:00", "1:36:00", "1:38:00",
         "1:40:00","1:42:00", "1:44:00", "1:46:00", "1:48:00", "1:50:00", "1:52:00","1:54:00", "1:56:00", "1:58:00", "2:00:00", "2:02:00", "2:04:00","2:06:00", "2:08:00", "2:10:00", "2:12:00", "2:14:00", "2:16:00","2:18:00",
         "2:20:00", "2:22:00", "2:24:00", "2:26:00", "2:28:00","2:30:00", "2:32:00", "2:34:00", "2:36:00", "2:38:00", "2:40:00","2:42:00", "2:44:00", "2:46:00", "2:48:00", "2:50:00", "2:52:00","2:54:00", "2:56:00", "2:58:00",
         "3:00:00", "3:02:00", "3:04:00","3:06:00", "3:08:00", "3:10:00", "3:12:00", "3:14:00", "3:16:00","3:18:00", "3:20:00", "3:22:00", "3:24:00", "3:26:00", "3:28:00","3:30:00", "3:32:00", "3:34:00", "3:36:00", "3:38:00", "3:40:00","3:42:00", "3:44:00", "3:48:00", "3:50:00", "3:52:00", "3:54:00","3:56:00", "3:58:00", "4:00:00", "4:02:00", "4:04:00", "4:06:00","4:08:00"), class = "factor"),
         Primary.VB = structure(c(1L,3L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L), .Label = c("", "echoic","intraverbal", "mand", "tact"), class = "factor"),
         Secondary.VB = structure(c(1L,1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "echoic","intraverbal", "mand", "tact"), class = "factor"),
         Tertiary.VB = structure(c(1L,1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "intraverbal","mand", "tact"), class = "factor"), intraverbal = c(0, 1, 0,1, 0, 1, 0, 0, 0, 0),
         tact = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0),mand = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0),
         echoic = c(0, 0,0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("time.bin", "Primary.VB","Secondary.VB","Tertiary.VB","intraverbal",
         "tact", "mand", "echoic"), row.names = c(1L, 2L,3L, 4L, 5L, 6L, 7L, 1648L, 1649L, 1650L), class = "data.frame") 

不是答案,只是我将删除的扩展评论。如果我们暂时忽略 x 轴表示因子数……看起来还好吗?

tbl_df(df_so) %>% 
  group_by(time.bin) %>% 
  mutate(Csum=cumsum(intraverbal)) %>% 
  summarise(last=last(Csum)) %>% 
  mutate(tCsum=cumsum(last)) %>% 
  mutate(time.bin=as.numeric(time.bin)) %>% 
  ggplot(., aes(time.bin, tCsum))+
  geom_step()