识别时间使用数据中的开始和结束
Identifying start-end in a time use data
我正在处理时间使用数据,并希望计算每个 ID 在每个步骤开始测量的持续时间。我正在考虑使用 colsum 函数,但我不确定如果一个 id activity 是零散的并且有多个开始时间。
id 1
和 10
的输出示例
id start-end duration
1 04:00-06:20 15
10 04:00-4:10 2
10 04:50-06:20 10
示例数据:
structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14), `04:00` = c(11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11), `04:10` = c(11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11), `04:20` = c(11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11), `04:30` = c(11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `04:40` = c(11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `04:50` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:00` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:10` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:20` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:30` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:40` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:50` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `06:00` = c(11,
0, 11, 11, 11, 11, 11, 0, 0, 11, 11, 11, 11, 11), `06:10` = c(11,
0, 11, 11, 11, 11, 11, 0, 0, 11, 11, 11, 11, 11), `06:20` = c(11,
0, 11, 11, 11, 11, 11, 0, 0, 11, 11, 11, 11, 11)), row.names = c(NA,
-14L), spec = structure(list(cols = list(id = structure(list(), class = c("collector_double",
"collector")), `04:00` = structure(list(), class = c("collector_double",
"collector")), `04:10` = structure(list(), class = c("collector_double",
"collector")), `04:20` = structure(list(), class = c("collector_double",
"collector")), `04:30` = structure(list(), class = c("collector_double",
"collector")), `04:40` = structure(list(), class = c("collector_double",
"collector")), `04:50` = structure(list(), class = c("collector_double",
"collector")), `05:00` = structure(list(), class = c("collector_double",
"collector")), `05:10` = structure(list(), class = c("collector_double",
"collector")), `05:20` = structure(list(), class = c("collector_double",
"collector")), `05:30` = structure(list(), class = c("collector_double",
"collector")), `05:40` = structure(list(), class = c("collector_double",
"collector")), `05:50` = structure(list(), class = c("collector_double",
"collector")), `06:00` = structure(list(), class = c("collector_double",
"collector")), `06:10` = structure(list(), class = c("collector_double",
"collector")), `06:20` = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
假设df
是提供的数据集:
library(tidyverse)
df %>%
pivot_longer(cols = -id) %>%
mutate(id_interval = value == 0,
id_interval = cumsum(id_interval) + id) %>%
filter(value != 0) %>%
group_by(id_interval) %>%
summarise(
id = unique(id),
`start-end` = str_c(head(name, 1), tail(name, 1), sep = "-"),
duration = n()) %>%
select(-id_interval)
这会产生
# A tibble: 14 × 3
id `start-end` duration
<dbl> <chr> <int>
1 1 04:00-06:20 15
2 2 04:00-05:50 12
3 3 04:00-06:20 15
4 4 04:00-06:20 15
5 5 04:00-06:20 15
6 6 04:00-06:20 15
7 7 04:00-06:20 15
8 8 04:00-05:50 12
9 9 04:00-05:50 12
10 10 04:00-06:20 15
11 11 04:00-06:20 15
12 12 04:00-06:20 15
13 13 04:00-06:20 15
14 14 04:00-06:20 15
请注意,提供的 dput
输入与上图不符。
它看起来更像这样:
解决方案使用data.table
library(data.table)
setDT(dt)
# your sample data does not illustrate it well like on your screenshot
# add some "breaks" on second row, col 4-6
dt[2,4:6] <- 0
dl <- melt(dt, id.vars = "id")
setorder(dl, id, variable)
dl[, .(`start-end` = paste0(first(variable), "-", last(variable)), duration = .N), by = .(id, rleid(value), value > 0)][value == T, .(id, `start-end`, duration)]
输出
# id start-end duration
# 1: 1 04:00-06:20 15
# 2: 2 04:00-04:10 2
# 3: 2 04:50-05:50 7
# 4: 3 04:00-06:20 15
# 5: 4 04:00-06:20 15
# 6: 5 04:00-06:20 15
# 7: 6 04:00-06:20 15
# 8: 7 04:00-06:20 15
# 9: 8 04:00-05:50 12
# 10: 9 04:00-05:50 12
# 11: 10 04:00-06:20 15
# 12: 11 04:00-06:20 15
# 13: 12 04:00-06:20 15
# 14: 13 04:00-06:20 15
# 15: 14 04:00-06:20 15
数据
dt <- structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14), `04:00` = c(11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11), `04:10` = c(11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11), `04:20` = c(11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11), `04:30` = c(11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `04:40` = c(11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `04:50` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:00` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:10` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:20` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:30` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:40` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:50` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `06:00` = c(11,
0, 11, 11, 11, 11, 11, 0, 0, 11, 11, 11, 11, 11), `06:10` = c(11,
0, 11, 11, 11, 11, 11, 0, 0, 11, 11, 11, 11, 11), `06:20` = c(11,
0, 11, 11, 11, 11, 11, 0, 0, 11, 11, 11, 11, 11)), row.names = c(NA,
-14L), spec = structure(list(cols = list(id = structure(list(), class = c("collector_double",
"collector")), `04:00` = structure(list(), class = c("collector_double",
"collector")), `04:10` = structure(list(), class = c("collector_double",
"collector")), `04:20` = structure(list(), class = c("collector_double",
"collector")), `04:30` = structure(list(), class = c("collector_double",
"collector")), `04:40` = structure(list(), class = c("collector_double",
"collector")), `04:50` = structure(list(), class = c("collector_double",
"collector")), `05:00` = structure(list(), class = c("collector_double",
"collector")), `05:10` = structure(list(), class = c("collector_double",
"collector")), `05:20` = structure(list(), class = c("collector_double",
"collector")), `05:30` = structure(list(), class = c("collector_double",
"collector")), `05:40` = structure(list(), class = c("collector_double",
"collector")), `05:50` = structure(list(), class = c("collector_double",
"collector")), `06:00` = structure(list(), class = c("collector_double",
"collector")), `06:10` = structure(list(), class = c("collector_double",
"collector")), `06:20` = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
我正在处理时间使用数据,并希望计算每个 ID 在每个步骤开始测量的持续时间。我正在考虑使用 colsum 函数,但我不确定如果一个 id activity 是零散的并且有多个开始时间。
id 1
和 10
id start-end duration
1 04:00-06:20 15
10 04:00-4:10 2
10 04:50-06:20 10
示例数据:
structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14), `04:00` = c(11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11), `04:10` = c(11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11), `04:20` = c(11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11), `04:30` = c(11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `04:40` = c(11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `04:50` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:00` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:10` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:20` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:30` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:40` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:50` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `06:00` = c(11,
0, 11, 11, 11, 11, 11, 0, 0, 11, 11, 11, 11, 11), `06:10` = c(11,
0, 11, 11, 11, 11, 11, 0, 0, 11, 11, 11, 11, 11), `06:20` = c(11,
0, 11, 11, 11, 11, 11, 0, 0, 11, 11, 11, 11, 11)), row.names = c(NA,
-14L), spec = structure(list(cols = list(id = structure(list(), class = c("collector_double",
"collector")), `04:00` = structure(list(), class = c("collector_double",
"collector")), `04:10` = structure(list(), class = c("collector_double",
"collector")), `04:20` = structure(list(), class = c("collector_double",
"collector")), `04:30` = structure(list(), class = c("collector_double",
"collector")), `04:40` = structure(list(), class = c("collector_double",
"collector")), `04:50` = structure(list(), class = c("collector_double",
"collector")), `05:00` = structure(list(), class = c("collector_double",
"collector")), `05:10` = structure(list(), class = c("collector_double",
"collector")), `05:20` = structure(list(), class = c("collector_double",
"collector")), `05:30` = structure(list(), class = c("collector_double",
"collector")), `05:40` = structure(list(), class = c("collector_double",
"collector")), `05:50` = structure(list(), class = c("collector_double",
"collector")), `06:00` = structure(list(), class = c("collector_double",
"collector")), `06:10` = structure(list(), class = c("collector_double",
"collector")), `06:20` = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
假设df
是提供的数据集:
library(tidyverse)
df %>%
pivot_longer(cols = -id) %>%
mutate(id_interval = value == 0,
id_interval = cumsum(id_interval) + id) %>%
filter(value != 0) %>%
group_by(id_interval) %>%
summarise(
id = unique(id),
`start-end` = str_c(head(name, 1), tail(name, 1), sep = "-"),
duration = n()) %>%
select(-id_interval)
这会产生
# A tibble: 14 × 3
id `start-end` duration
<dbl> <chr> <int>
1 1 04:00-06:20 15
2 2 04:00-05:50 12
3 3 04:00-06:20 15
4 4 04:00-06:20 15
5 5 04:00-06:20 15
6 6 04:00-06:20 15
7 7 04:00-06:20 15
8 8 04:00-05:50 12
9 9 04:00-05:50 12
10 10 04:00-06:20 15
11 11 04:00-06:20 15
12 12 04:00-06:20 15
13 13 04:00-06:20 15
14 14 04:00-06:20 15
请注意,提供的 dput
输入与上图不符。
它看起来更像这样:
解决方案使用data.table
library(data.table)
setDT(dt)
# your sample data does not illustrate it well like on your screenshot
# add some "breaks" on second row, col 4-6
dt[2,4:6] <- 0
dl <- melt(dt, id.vars = "id")
setorder(dl, id, variable)
dl[, .(`start-end` = paste0(first(variable), "-", last(variable)), duration = .N), by = .(id, rleid(value), value > 0)][value == T, .(id, `start-end`, duration)]
输出
# id start-end duration
# 1: 1 04:00-06:20 15
# 2: 2 04:00-04:10 2
# 3: 2 04:50-05:50 7
# 4: 3 04:00-06:20 15
# 5: 4 04:00-06:20 15
# 6: 5 04:00-06:20 15
# 7: 6 04:00-06:20 15
# 8: 7 04:00-06:20 15
# 9: 8 04:00-05:50 12
# 10: 9 04:00-05:50 12
# 11: 10 04:00-06:20 15
# 12: 11 04:00-06:20 15
# 13: 12 04:00-06:20 15
# 14: 13 04:00-06:20 15
# 15: 14 04:00-06:20 15
数据
dt <- structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14), `04:00` = c(11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11), `04:10` = c(11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11), `04:20` = c(11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11), `04:30` = c(11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `04:40` = c(11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `04:50` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:00` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:10` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:20` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:30` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:40` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `05:50` = c(11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11), `06:00` = c(11,
0, 11, 11, 11, 11, 11, 0, 0, 11, 11, 11, 11, 11), `06:10` = c(11,
0, 11, 11, 11, 11, 11, 0, 0, 11, 11, 11, 11, 11), `06:20` = c(11,
0, 11, 11, 11, 11, 11, 0, 0, 11, 11, 11, 11, 11)), row.names = c(NA,
-14L), spec = structure(list(cols = list(id = structure(list(), class = c("collector_double",
"collector")), `04:00` = structure(list(), class = c("collector_double",
"collector")), `04:10` = structure(list(), class = c("collector_double",
"collector")), `04:20` = structure(list(), class = c("collector_double",
"collector")), `04:30` = structure(list(), class = c("collector_double",
"collector")), `04:40` = structure(list(), class = c("collector_double",
"collector")), `04:50` = structure(list(), class = c("collector_double",
"collector")), `05:00` = structure(list(), class = c("collector_double",
"collector")), `05:10` = structure(list(), class = c("collector_double",
"collector")), `05:20` = structure(list(), class = c("collector_double",
"collector")), `05:30` = structure(list(), class = c("collector_double",
"collector")), `05:40` = structure(list(), class = c("collector_double",
"collector")), `05:50` = structure(list(), class = c("collector_double",
"collector")), `06:00` = structure(list(), class = c("collector_double",
"collector")), `06:10` = structure(list(), class = c("collector_double",
"collector")), `06:20` = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))