r 中的用户登录时间图
Graph for user login time in r
所以我有三列,ID,用户登录时间,以及他登录了多长时间(以秒为单位)。这是它的一个示例(总用户超过 4000):
structure(
list(
id = c(
"id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7", "id_8", "id_9",
"id_10", "id_11", "id_12", "id_13", "id_14", "id_15", "id_16", "id_17"
),
time_start = structure(
c(
37176, 30846, 30972, 38432, 31016, 37846, 30588, 31303, 37312, 30849,
38466, 30683, 38412, 30643, 29865, 30056, 31727
),
class = c("hms", "difftime"), units = "secs"
),
time_sec = c(
3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L, 3350L,
3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L
)
),
.Names = c("id", "time_start", "time_sec"),
row.names = c(NA, -17L),
class = c("tbl_df", "tbl", "data.frame")
)
我正在尝试创建一个图表来显示在任何给定时间有多少用户,间隔为 X(例如 5 分钟)。
比如在 9:00 时钟有 X 个用户
9:05 有 x 个用户。
关于在任何给定时间有多少连接的直方图或类似的东西。
我的主要问题是在用户注销时删除他们。
我觉得答案很刺耳,但找不到,也不知道要搜索什么。
这是一个简单的命题
library(ggplot2)
library(RColorBrewer)
myPalette <- colorRampPalette(brewer.pal(11, "Spectral"))
data <- structure(list(id = c("id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7",
"id_8", "id_9", "id_10", "id_11", "id_12", "id_13",
"id_14", "id_15", "id_16", "id_17"),
time_start = structure(c(37176, 30846, 30972, 38432, 31016, 37846, 30588,
31303, 37312, 30849, 38466, 30683, 38412, 30643,
29865, 30056, 31727), class = c("hms", "difftime"), units = "secs"),
time_sec = c(3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L,
3350L, 3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L)),
.Names = c("id", "time_start", "time_sec"),
row.names = c(NA, -17L),
class = c("tbl_df", "tbl", "data.frame"))
data$id <- factor(data$id, levels = data$id) # ordonate ID factor
ggplot(data) + geom_segment(aes(y=id, yend=id, x=time_start, xend=time_start+time_sec, colour=time_sec), size=2) +
theme_bw() + scale_colour_gradientn("Duration", colours = rev(myPalette(100))) +
scale_y_discrete("Users") + scale_x_time("Connexion time")
编辑:理解您的问题后,这里有一种简单的方法可以满足您的需求。
time <- seq(from=min(data$time_start), to=max(data$time_start+data$time_sec), length.out=1000)
sum <- sapply(time, FUN=function(x) sum(data$time_start < x & data$time_start+data$time_sec > x))
data2 <- data.frame(time=time, sum=sum)
ggplot(data2) + geom_line(aes(x=time, y=sum)) +
scale_x_time() + theme_bw()
foverlaps()
in data.table
是另一种方式。
structure(
list(
id = c(
"id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7", "id_8", "id_9",
"id_10", "id_11", "id_12", "id_13", "id_14", "id_15", "id_16", "id_17"
),
time_start = structure(
c(
37176, 30846, 30972, 38432, 31016, 37846, 30588, 31303, 37312, 30849,
38466, 30683, 38412, 30643, 29865, 30056, 31727
),
class = c("hms", "difftime"), units = "secs"
),
time_sec = c(
3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L, 3350L,
3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L
)
),
.Names = c("id", "time_start", "time_sec"),
row.names = c(NA, -17L),
class = c("tbl_df", "tbl", "data.frame")
) -> xdf
library(hrbrthemes) # devtools::install_git("https://gitlab.com/hrbrmstr/hrbrthemes")
library(data.table)
library(ggplot2)
library(dplyr)
library(tidyr)
arrange(xdf, time_start) %>%
mutate(id = factor(id, levels=rev(id))) %>%
ggplot(aes(time_start, id)) +
geom_segment(
aes(xend=time_start+time_sec, yend=id),
size=1.5, color=ft_cols$slate
) +
labs(
x = NULL, y = NULL, title = "Login/Usage Overlap Overview"
) +
theme_ipsum_rc(grid="X")
# Get the range
rng <- as.numeric(round(range(xdf$time_start), -1))
# 5-minute intervals
data_frame(
start = seq(rng[1], rng[2], 300),
end = start+299
) -> ranges
# the function we need requires a temporary conversion to a data.table
ranges <- data.table(ranges)
setkey(ranges, start, end)
# create start/end for oritinal data
xdf$start <- as.numeric(xdf$time_start)
xdf$end <- xdf$time_start + as.numeric(xdf$time_sec)
xdf <- data.table(xdf)
foverlaps(xdf, ranges) %>% # the magic is in this function
as_data_frame() %>% # back to normal
count(start) %>%
complete(start = ranges$start, fill=list(n=0)) %>%
ggplot(aes(start, n)) +
geom_line() + # there are other ways to show this data, too
geom_label(aes(label=n)) +
labs(
x = NULL, y = "# Users concurrently logged-in",
title = "Active Users per-5-minute Interval"
) +
theme_ipsum_rc()
所以我有三列,ID,用户登录时间,以及他登录了多长时间(以秒为单位)。这是它的一个示例(总用户超过 4000):
structure(
list(
id = c(
"id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7", "id_8", "id_9",
"id_10", "id_11", "id_12", "id_13", "id_14", "id_15", "id_16", "id_17"
),
time_start = structure(
c(
37176, 30846, 30972, 38432, 31016, 37846, 30588, 31303, 37312, 30849,
38466, 30683, 38412, 30643, 29865, 30056, 31727
),
class = c("hms", "difftime"), units = "secs"
),
time_sec = c(
3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L, 3350L,
3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L
)
),
.Names = c("id", "time_start", "time_sec"),
row.names = c(NA, -17L),
class = c("tbl_df", "tbl", "data.frame")
)
我正在尝试创建一个图表来显示在任何给定时间有多少用户,间隔为 X(例如 5 分钟)。
比如在 9:00 时钟有 X 个用户
9:05 有 x 个用户。
关于在任何给定时间有多少连接的直方图或类似的东西。
我的主要问题是在用户注销时删除他们。
我觉得答案很刺耳,但找不到,也不知道要搜索什么。
这是一个简单的命题
library(ggplot2)
library(RColorBrewer)
myPalette <- colorRampPalette(brewer.pal(11, "Spectral"))
data <- structure(list(id = c("id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7",
"id_8", "id_9", "id_10", "id_11", "id_12", "id_13",
"id_14", "id_15", "id_16", "id_17"),
time_start = structure(c(37176, 30846, 30972, 38432, 31016, 37846, 30588,
31303, 37312, 30849, 38466, 30683, 38412, 30643,
29865, 30056, 31727), class = c("hms", "difftime"), units = "secs"),
time_sec = c(3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L,
3350L, 3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L)),
.Names = c("id", "time_start", "time_sec"),
row.names = c(NA, -17L),
class = c("tbl_df", "tbl", "data.frame"))
data$id <- factor(data$id, levels = data$id) # ordonate ID factor
ggplot(data) + geom_segment(aes(y=id, yend=id, x=time_start, xend=time_start+time_sec, colour=time_sec), size=2) +
theme_bw() + scale_colour_gradientn("Duration", colours = rev(myPalette(100))) +
scale_y_discrete("Users") + scale_x_time("Connexion time")
编辑:理解您的问题后,这里有一种简单的方法可以满足您的需求。
time <- seq(from=min(data$time_start), to=max(data$time_start+data$time_sec), length.out=1000)
sum <- sapply(time, FUN=function(x) sum(data$time_start < x & data$time_start+data$time_sec > x))
data2 <- data.frame(time=time, sum=sum)
ggplot(data2) + geom_line(aes(x=time, y=sum)) +
scale_x_time() + theme_bw()
foverlaps()
in data.table
是另一种方式。
structure(
list(
id = c(
"id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7", "id_8", "id_9",
"id_10", "id_11", "id_12", "id_13", "id_14", "id_15", "id_16", "id_17"
),
time_start = structure(
c(
37176, 30846, 30972, 38432, 31016, 37846, 30588, 31303, 37312, 30849,
38466, 30683, 38412, 30643, 29865, 30056, 31727
),
class = c("hms", "difftime"), units = "secs"
),
time_sec = c(
3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L, 3350L,
3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L
)
),
.Names = c("id", "time_start", "time_sec"),
row.names = c(NA, -17L),
class = c("tbl_df", "tbl", "data.frame")
) -> xdf
library(hrbrthemes) # devtools::install_git("https://gitlab.com/hrbrmstr/hrbrthemes")
library(data.table)
library(ggplot2)
library(dplyr)
library(tidyr)
arrange(xdf, time_start) %>%
mutate(id = factor(id, levels=rev(id))) %>%
ggplot(aes(time_start, id)) +
geom_segment(
aes(xend=time_start+time_sec, yend=id),
size=1.5, color=ft_cols$slate
) +
labs(
x = NULL, y = NULL, title = "Login/Usage Overlap Overview"
) +
theme_ipsum_rc(grid="X")
# Get the range
rng <- as.numeric(round(range(xdf$time_start), -1))
# 5-minute intervals
data_frame(
start = seq(rng[1], rng[2], 300),
end = start+299
) -> ranges
# the function we need requires a temporary conversion to a data.table
ranges <- data.table(ranges)
setkey(ranges, start, end)
# create start/end for oritinal data
xdf$start <- as.numeric(xdf$time_start)
xdf$end <- xdf$time_start + as.numeric(xdf$time_sec)
xdf <- data.table(xdf)
foverlaps(xdf, ranges) %>% # the magic is in this function
as_data_frame() %>% # back to normal
count(start) %>%
complete(start = ranges$start, fill=list(n=0)) %>%
ggplot(aes(start, n)) +
geom_line() + # there are other ways to show this data, too
geom_label(aes(label=n)) +
labs(
x = NULL, y = "# Users concurrently logged-in",
title = "Active Users per-5-minute Interval"
) +
theme_ipsum_rc()