r 中的用户登录时间图

Graph for user login time in r

所以我有三列,ID,用户登录时间,以及他登录了多长时间(以秒为单位)。这是它的一个示例(总用户超过 4000):

structure(
  list(
    id = c(
      "id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7", "id_8", "id_9",
      "id_10", "id_11", "id_12", "id_13", "id_14", "id_15", "id_16", "id_17"
    ),
    time_start = structure(
      c(
        37176, 30846, 30972, 38432, 31016, 37846, 30588, 31303, 37312, 30849,
        38466, 30683, 38412, 30643, 29865, 30056, 31727
      ),
      class = c("hms", "difftime"), units = "secs"
    ),
    time_sec = c(
      3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L, 3350L,
      3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L
    )
  ),
  .Names = c("id", "time_start", "time_sec"),
  row.names = c(NA, -17L),
  class = c("tbl_df", "tbl", "data.frame")
)

我正在尝试创建一个图表来显示在任何给定时间有多少用户,间隔为 X(例如 5 分钟)。
比如在 9:00 时钟有 X 个用户
9:05 有 x 个用户。
关于在任何给定时间有多少连接的直方图或类似的东西。
我的主要问题是在用户注销时删除他们。

我觉得答案很刺耳,但找不到,也不知道要搜索什么。

这是一个简单的命题

library(ggplot2)
library(RColorBrewer)

myPalette <- colorRampPalette(brewer.pal(11, "Spectral"))

data <- structure(list(id = c("id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7", 
                              "id_8", "id_9", "id_10", "id_11", "id_12", "id_13", 
                              "id_14", "id_15", "id_16", "id_17"), 
                       time_start = structure(c(37176, 30846, 30972, 38432, 31016, 37846, 30588, 
                                                31303, 37312, 30849, 38466, 30683, 38412, 30643, 
                                                29865, 30056, 31727), class = c("hms", "difftime"), units = "secs"), 
                       time_sec = c(3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L, 
                                    3350L, 3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L)), 
                  .Names = c("id", "time_start", "time_sec"), 
                  row.names = c(NA, -17L), 
                  class = c("tbl_df", "tbl", "data.frame"))

data$id <- factor(data$id, levels = data$id) # ordonate ID factor

ggplot(data) + geom_segment(aes(y=id, yend=id, x=time_start, xend=time_start+time_sec, colour=time_sec), size=2) +
  theme_bw() + scale_colour_gradientn("Duration", colours = rev(myPalette(100))) +
  scale_y_discrete("Users") + scale_x_time("Connexion time")

编辑:理解您的问题后,这里有一种简单的方法可以满足您的需求。

time <- seq(from=min(data$time_start), to=max(data$time_start+data$time_sec), length.out=1000)
sum <- sapply(time, FUN=function(x) sum(data$time_start < x & data$time_start+data$time_sec > x))

data2 <- data.frame(time=time, sum=sum)

ggplot(data2) + geom_line(aes(x=time, y=sum)) + 
  scale_x_time() + theme_bw()

foverlaps() in data.table 是另一种方式。

structure(
  list(
    id = c(
      "id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7", "id_8", "id_9",
      "id_10", "id_11", "id_12", "id_13", "id_14", "id_15", "id_16", "id_17"
    ),
    time_start = structure(
      c(
        37176, 30846, 30972, 38432, 31016, 37846, 30588, 31303, 37312, 30849,
        38466, 30683, 38412, 30643, 29865, 30056, 31727
      ),
      class = c("hms", "difftime"), units = "secs"
    ),
    time_sec = c(
      3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L, 3350L,
      3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L
    )
  ),
  .Names = c("id", "time_start", "time_sec"),
  row.names = c(NA, -17L),
  class = c("tbl_df", "tbl", "data.frame")
) -> xdf

library(hrbrthemes) # devtools::install_git("https://gitlab.com/hrbrmstr/hrbrthemes")
library(data.table)
library(ggplot2)
library(dplyr)
library(tidyr)

arrange(xdf, time_start) %>%
  mutate(id = factor(id, levels=rev(id))) %>%
  ggplot(aes(time_start, id)) +
  geom_segment(
    aes(xend=time_start+time_sec, yend=id),
    size=1.5, color=ft_cols$slate
  ) +
  labs(
    x = NULL, y = NULL, title = "Login/Usage Overlap Overview"
  ) +
  theme_ipsum_rc(grid="X")

# Get the range
rng <- as.numeric(round(range(xdf$time_start), -1))

# 5-minute intervals
data_frame(
  start = seq(rng[1], rng[2], 300),
  end = start+299
) -> ranges

# the function we need requires a temporary conversion to a data.table
ranges <- data.table(ranges)
setkey(ranges, start, end)

# create start/end for oritinal data
xdf$start <- as.numeric(xdf$time_start)
xdf$end <- xdf$time_start + as.numeric(xdf$time_sec)
xdf <- data.table(xdf)

foverlaps(xdf, ranges) %>% # the magic is in this function
  as_data_frame() %>% # back to normal
  count(start) %>%
  complete(start = ranges$start, fill=list(n=0)) %>%
  ggplot(aes(start, n)) +
  geom_line() + # there are other ways to show this data, too
  geom_label(aes(label=n)) +
  labs(
    x = NULL, y = "# Users concurrently logged-in",
    title = "Active Users per-5-minute Interval"
  ) +
  theme_ipsum_rc()