第 until/after 行最后偏离递增顺序(最后一个升序循环)

Rows until/after last departure from increasing order (last ascending cycle)

我有类似于带有时间戳的多个时间序列的数据。它由 group 组织,并且有一个循环组件,其中 time 在一些周期内增加,由这种增加模式的突然变化(即减少)界定。

我只想保留最后一次上升趋势变化(最后一个上升周期)之前或之后的数据(行)。

一些合成的最小数据:

df <- 
  data.frame(group = c(rep("A", 10), rep("B", 10), rep("C", 10)),
             time = c(c(1:3, 2, 3:6, 5, 6), c(1:2, 1, 3, 7, 6:10), c(4, 3, 6, 4, 6, 7, 6, 8:10))         
  )

上升趋势的最后变化是什么意思:

  library(dplyr)
  
# Just exemplying last change in monotonic increasing trend
df %>%
  dplyr::group_by(group) %>%
  dplyr::mutate(
    row_num = dplyr::row_number(),
    time_order = dplyr::case_when(time - dplyr::lag(time, n = 1) >= 0 ~ "increase",  
                                  time - dplyr::lag(time, n = 1) < 0 ~ "decrease",
                                  TRUE ~ "increase"),
    where_split = dplyr::if_else(dplyr::last(which(time_order == "decrease")) == row_num, "here", NA_character_)
  ) %>%
    print(n = Inf)
#> # A tibble: 30 x 5
#> # Groups:   group [3]
#>    group  time row_num time_order where_split
#>    <chr> <dbl>   <int> <chr>      <chr>      
#>  1 A         1       1 increase   <NA>       
#>  2 A         2       2 increase   <NA>       
#>  3 A         3       3 increase   <NA>       
#>  4 A         2       4 decrease   <NA>       
#>  5 A         3       5 increase   <NA>       
#>  6 A         4       6 increase   <NA>       
#>  7 A         5       7 increase   <NA>       
#>  8 A         6       8 increase   <NA>       
#>  9 A         5       9 decrease   here       
#> 10 A         6      10 increase   <NA>       
#> 11 B         1       1 increase   <NA>       
#> 12 B         2       2 increase   <NA>       
#> 13 B         1       3 decrease   <NA>       
#> 14 B         3       4 increase   <NA>       
#> 15 B         7       5 increase   <NA>       
#> 16 B         6       6 decrease   here       
#> 17 B         7       7 increase   <NA>       
#> 18 B         8       8 increase   <NA>       
#> 19 B         9       9 increase   <NA>       
#> 20 B        10      10 increase   <NA>       
#> 21 C         4       1 increase   <NA>       
#> 22 C         3       2 decrease   <NA>       
#> 23 C         6       3 increase   <NA>       
#> 24 C         4       4 decrease   <NA>       
#> 25 C         6       5 increase   <NA>       
#> 26 C         7       6 increase   <NA>       
#> 27 C         6       7 decrease   here       
#> 28 C         8       8 increase   <NA>       
#> 29 C         9       9 increase   <NA>       
#> 30 C        10      10 increase   <NA>

reprex package (v2.0.1)

于 2022-05-17 创建

为了便于验证,我给出我的解决方案:

# All rows until last change in trend, by group
check_until <- 
  df %>%
  dplyr::group_by(group) %>%
  dplyr::mutate(
    row_num = dplyr::row_number(),
    time_order = dplyr::case_when(time - dplyr::lag(time, n = 1) >= 0 ~ "increase",  
                                  time - dplyr::lag(time, n = 1) < 0 ~ "decrease",
                                  TRUE ~ "increase")) %>%
      dplyr::slice(1:dplyr::last(which(time_order == "decrease"))) %>%
    dplyr::select(-c(row_num, time_order))
# All rows after last change in trend, by group
check_after <- 
  df %>%
  group_by(group) %>%
  dplyr::mutate(
    row_num = dplyr::row_number(),
    time_order = dplyr::case_when(time - lag(time, n = 1) >= 0 ~ "increase",  
                                  time - lag(time, n = 1) < 0 ~ "decrease",
                                  TRUE ~ "increase")) %>%
  dplyr::slice(dplyr::last(which(time_order == "decrease")):max(row_num)) %>%
  dplyr::select(-c(row_num, time_order)) 

我的解决方案有效,但它们似乎过于冗长且效率低下。我相信还有更优雅的解决方案。欢迎任何见解,我也愿意接受 datatable 解决方案。

这两种情况都可以通过cumsum + diff + slice(_max)实现。

(1) 所有行直到最后一次趋势变化:

df %>%
  group_by(group) %>%
  slice(1:which.max(cumsum(c(1, diff(time) < 0)))) %>%
  ungroup()

# # A tibble: 22 × 2
#    group  time
#    <chr> <dbl>
#  1 A         1
#  2 A         2
#  3 A         3
#  4 A         2
#  5 A         3
#  6 A         4
#  7 A         5
#  8 A         6
#  9 A         5
# 10 B         1
# 11 B         2
# 12 B         1
# 13 B         3
# 14 B         7
# 15 B         6
# 16 C         4
# 17 C         3
# 18 C         6
# 19 C         4
# 20 C         6
# 21 C         7
# 22 C         6

(2) 最后一次趋势变化后的所有行:

df %>%
  group_by(group) %>%
  slice_max(cumsum(c(1, diff(time) < 0))) %>%
  ungroup()

# A tibble: 11 × 2
#    group  time
#    <chr> <dbl>
#  1 A         5
#  2 A         6
#  3 B         6
#  4 B         7
#  5 B         8
#  6 B         9
#  7 B        10
#  8 C         6
#  9 C         8
# 10 C         9
# 11 C        10