识别每列的序列
Identifying sequences per column
我正在处理时间使用数据,并希望计算每个时间步长(每列)开始测量的持续时间和 select 每次测量的最长持续时间。测量编号从 1
到 27
。长度用 1 (e.g increment is set to 1).
加权 我不确定如果测量是零散的并且有多个持续时间,我不确定如何处理。
数据格式:
期望的输出(测量编号 1 的示例):
Time Measurement Duration
04:00 1 1
04:10 1 1
04:20 1 2
04:20 1 2
04:20 1 2
持续时间最长
Time Measurement Duration
04:20 1 2
示例数据:
df<-structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14), `04:00` = c(1, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11), `04:10` = c(1, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11), `04:20` = c(1, 11, 1, 1, 11, 11, 11, 11, 11,
1, 1, 11, 11, 11), `04:30` = c(1, 11, 1, 1, 3, 11, 11, 11, 11,
1, 1, 13, 11, 11), `04:40` = c(1, 11, 1, 1, 3, 12, 11, 11, 4,
1, 1, 13, 4, 11), `04:50` = c(4, 11, 11, 11, 3, 12, 11, 11, 4,
11, 11, 13, 4, 11), `05:00` = c(4, 11, 11, 11, 3, 12, 11, 11,
4, 13, 11, 13, 4, 11), `05:10` = c(4, 11, 11, 11, 3, 12, 11,
11, 4, 13, 11, 13, 4, 11), `05:20` = c(4, 11, 11, 11, 11, 13,
4, 11, 4, 13, 11, 13, 4, 11), `05:30` = c(4, 11, 11, 11, 11,
13, 4, 13, 4, 13, 11, 1, 4, 13), `05:40` = c(4, 11, 3, 11, 11,
13, 4, 13, 11, 13, 11, 1, 1, 13), `05:50` = c(11, 11, 3, 11,
11, 13, 4, 13, 11, 13, 11, 1, 11, 13), `06:00` = c(11, 1, 3,
11, 11, 13, 4, 13, 1, 11, 11, 11, 11, 13), `06:10` = c(11, 1,
3, 11, 11, 13, 4, 13, 1, 11, 11, 11, 11, 13), `06:20` = c(11,
1, 3, 11, 11, 11, 11, 13, 1, 11, 11, 11, 11, 13)), row.names = c(NA,
-14L), spec = structure(list(cols = list(id = structure(list(), class = c("collector_double",
"collector")), `04:00` = structure(list(), class = c("collector_double",
"collector")), `04:10` = structure(list(), class = c("collector_double",
"collector")), `04:20` = structure(list(), class = c("collector_double",
"collector")), `04:30` = structure(list(), class = c("collector_double",
"collector")), `04:40` = structure(list(), class = c("collector_double",
"collector")), `04:50` = structure(list(), class = c("collector_double",
"collector")), `05:00` = structure(list(), class = c("collector_double",
"collector")), `05:10` = structure(list(), class = c("collector_double",
"collector")), `05:20` = structure(list(), class = c("collector_double",
"collector")), `05:30` = structure(list(), class = c("collector_double",
"collector")), `05:40` = structure(list(), class = c("collector_double",
"collector")), `05:50` = structure(list(), class = c("collector_double",
"collector")), `06:00` = structure(list(), class = c("collector_double",
"collector")), `06:10` = structure(list(), class = c("collector_double",
"collector")), `06:20` = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
library(tidyverse)
library(lubridate)
df %>%
pivot_longer(-id, names_to = "timepoint", values_to = "Measurement") %>%
arrange(id, Measurement) %>%
type_convert() %>%
group_by(id) %>%
# Duration to first time point for each id
mutate(Duration = timepoint - min(timepoint)) %>%
# get the longest duration
filter(Duration == max(Duration))
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> timepoint = col_time(format = "")
#> )
#> # A tibble: 14 × 4
#> # Groups: id [14]
#> id timepoint Measurement Duration
#> <dbl> <time> <dbl> <drtn>
#> 1 1 06:20 11 8400 secs
#> 2 2 06:20 1 8400 secs
#> 3 3 06:20 3 8400 secs
#> 4 4 06:20 11 8400 secs
#> 5 5 06:20 11 8400 secs
#> 6 6 06:20 11 8400 secs
#> 7 7 06:20 11 8400 secs
#> 8 8 06:20 13 8400 secs
#> 9 9 06:20 1 8400 secs
#> 10 10 06:20 11 8400 secs
#> 11 11 06:20 11 8400 secs
#> 12 12 06:20 11 8400 secs
#> 13 13 06:20 11 8400 secs
#> 14 14 06:20 13 8400 secs
由 reprex package (v2.0.0)
于 2022-05-16 创建
这是一个主要使用 rle
的函数,它将为您提供特定测量所需的输出:
f <- function(n){
l <- lapply(df[-1], \(x) with(rle(x), lengths[values == n]))
enframe(l, name = "Time", value = "Duration") %>%
unnest("Duration") %>%
mutate(Measurement = n, .before = "Duration")
}
输出
> f(1)
# A tibble: 20 × 3
Time Measurement Duration
<chr> <dbl> <int>
1 04:00 1 1
2 04:10 1 1
3 04:20 1 1
4 04:20 1 2
5 04:20 1 2
6 04:30 1 1
7 04:30 1 2
8 04:30 1 2
9 04:40 1 1
10 04:40 1 2
11 04:40 1 2
12 05:30 1 1
13 05:40 1 2
14 05:50 1 1
15 06:00 1 1
16 06:00 1 1
17 06:10 1 1
18 06:10 1 1
19 06:20 1 1
20 06:20 1 1
用slice_max
得到最大值:
f(1) %>%
slice_max(Duration, n = 1, with_ties = F)
# A tibble: 1 × 3
Time Measurement Duration
<chr> <dbl> <int>
1 04:20 1 2
我正在处理时间使用数据,并希望计算每个时间步长(每列)开始测量的持续时间和 select 每次测量的最长持续时间。测量编号从 1
到 27
。长度用 1 (e.g increment is set to 1).
加权 我不确定如果测量是零散的并且有多个持续时间,我不确定如何处理。
数据格式:
期望的输出(测量编号 1 的示例):
Time Measurement Duration
04:00 1 1
04:10 1 1
04:20 1 2
04:20 1 2
04:20 1 2
持续时间最长
Time Measurement Duration
04:20 1 2
示例数据:
df<-structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14), `04:00` = c(1, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11), `04:10` = c(1, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11), `04:20` = c(1, 11, 1, 1, 11, 11, 11, 11, 11,
1, 1, 11, 11, 11), `04:30` = c(1, 11, 1, 1, 3, 11, 11, 11, 11,
1, 1, 13, 11, 11), `04:40` = c(1, 11, 1, 1, 3, 12, 11, 11, 4,
1, 1, 13, 4, 11), `04:50` = c(4, 11, 11, 11, 3, 12, 11, 11, 4,
11, 11, 13, 4, 11), `05:00` = c(4, 11, 11, 11, 3, 12, 11, 11,
4, 13, 11, 13, 4, 11), `05:10` = c(4, 11, 11, 11, 3, 12, 11,
11, 4, 13, 11, 13, 4, 11), `05:20` = c(4, 11, 11, 11, 11, 13,
4, 11, 4, 13, 11, 13, 4, 11), `05:30` = c(4, 11, 11, 11, 11,
13, 4, 13, 4, 13, 11, 1, 4, 13), `05:40` = c(4, 11, 3, 11, 11,
13, 4, 13, 11, 13, 11, 1, 1, 13), `05:50` = c(11, 11, 3, 11,
11, 13, 4, 13, 11, 13, 11, 1, 11, 13), `06:00` = c(11, 1, 3,
11, 11, 13, 4, 13, 1, 11, 11, 11, 11, 13), `06:10` = c(11, 1,
3, 11, 11, 13, 4, 13, 1, 11, 11, 11, 11, 13), `06:20` = c(11,
1, 3, 11, 11, 11, 11, 13, 1, 11, 11, 11, 11, 13)), row.names = c(NA,
-14L), spec = structure(list(cols = list(id = structure(list(), class = c("collector_double",
"collector")), `04:00` = structure(list(), class = c("collector_double",
"collector")), `04:10` = structure(list(), class = c("collector_double",
"collector")), `04:20` = structure(list(), class = c("collector_double",
"collector")), `04:30` = structure(list(), class = c("collector_double",
"collector")), `04:40` = structure(list(), class = c("collector_double",
"collector")), `04:50` = structure(list(), class = c("collector_double",
"collector")), `05:00` = structure(list(), class = c("collector_double",
"collector")), `05:10` = structure(list(), class = c("collector_double",
"collector")), `05:20` = structure(list(), class = c("collector_double",
"collector")), `05:30` = structure(list(), class = c("collector_double",
"collector")), `05:40` = structure(list(), class = c("collector_double",
"collector")), `05:50` = structure(list(), class = c("collector_double",
"collector")), `06:00` = structure(list(), class = c("collector_double",
"collector")), `06:10` = structure(list(), class = c("collector_double",
"collector")), `06:20` = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
library(tidyverse)
library(lubridate)
df %>%
pivot_longer(-id, names_to = "timepoint", values_to = "Measurement") %>%
arrange(id, Measurement) %>%
type_convert() %>%
group_by(id) %>%
# Duration to first time point for each id
mutate(Duration = timepoint - min(timepoint)) %>%
# get the longest duration
filter(Duration == max(Duration))
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> timepoint = col_time(format = "")
#> )
#> # A tibble: 14 × 4
#> # Groups: id [14]
#> id timepoint Measurement Duration
#> <dbl> <time> <dbl> <drtn>
#> 1 1 06:20 11 8400 secs
#> 2 2 06:20 1 8400 secs
#> 3 3 06:20 3 8400 secs
#> 4 4 06:20 11 8400 secs
#> 5 5 06:20 11 8400 secs
#> 6 6 06:20 11 8400 secs
#> 7 7 06:20 11 8400 secs
#> 8 8 06:20 13 8400 secs
#> 9 9 06:20 1 8400 secs
#> 10 10 06:20 11 8400 secs
#> 11 11 06:20 11 8400 secs
#> 12 12 06:20 11 8400 secs
#> 13 13 06:20 11 8400 secs
#> 14 14 06:20 13 8400 secs
由 reprex package (v2.0.0)
于 2022-05-16 创建这是一个主要使用 rle
的函数,它将为您提供特定测量所需的输出:
f <- function(n){
l <- lapply(df[-1], \(x) with(rle(x), lengths[values == n]))
enframe(l, name = "Time", value = "Duration") %>%
unnest("Duration") %>%
mutate(Measurement = n, .before = "Duration")
}
输出
> f(1)
# A tibble: 20 × 3
Time Measurement Duration
<chr> <dbl> <int>
1 04:00 1 1
2 04:10 1 1
3 04:20 1 1
4 04:20 1 2
5 04:20 1 2
6 04:30 1 1
7 04:30 1 2
8 04:30 1 2
9 04:40 1 1
10 04:40 1 2
11 04:40 1 2
12 05:30 1 1
13 05:40 1 2
14 05:50 1 1
15 06:00 1 1
16 06:00 1 1
17 06:10 1 1
18 06:10 1 1
19 06:20 1 1
20 06:20 1 1
用slice_max
得到最大值:
f(1) %>%
slice_max(Duration, n = 1, with_ties = F)
# A tibble: 1 × 3
Time Measurement Duration
<chr> <dbl> <int>
1 04:20 1 2