计算由数据帧 R 中的另一个变量分组的最大连续重复非 NA 值

Count maximum consecutive repeated non-NA values grouped by another variable in dataframe R

我想确定每个 ADM2_PCODE 的连续重复非 NA Valor 值的最大计数。因此,想法是按 ADM2_PCODE 分组,过滤掉 NA 值,为每个 Valor 值计算连续案例的最高计数,以及它们之间的最大出现次数 select。

下面的示例数据框:

df <- structure(list(Year = c(1981, 1982, 1983, 1984, 1985, 1986, 
                              1981, 1982, 1983, 1984, 1985, 1986,
                              1981, 1982, 1983, 1984, 1985, 1986), ADM2_PCODE = c(1100015, 1100015, 1100015, 1100015, 1100015, 1100015, 
                                                                                  1100016, 1100016, 1100016, 1100016, 1100016, 1100016,
                                                                                  1100017, 1100017, 1100017, 1100017, 1100017, 1100017), 
                     Valor = c(NA, NA, 30, 30, NA, NA,
                               90, 10, 90, 10, 10, 10,
                               30, 20, 30, 40, 30, 60), geometry = c("MULTIPOLYGON (((-62.0495 -1...",
                                                                     "MULTIPOLYGON (((-62.0495 -1...", "MULTIPOLYGON (((-62.0495 -1...",
                                                                     "MULTIPOLYGON (((-62.0495 -1...", "MULTIPOLYGON (((-62.0495 -1...",
                                                                     "MULTIPOLYGON (((-62.0495 -1...", "MULTIPOLYGON (((-63.0495 -1...",
                                                                     "MULTIPOLYGON (((-62.0495 -1...", "MULTIPOLYGON (((-62.0495 -1...",
                                                                     "MULTIPOLYGON (((-62.0495 -1...", "MULTIPOLYGON (((-62.0495 -1...",
                                                                     "MULTIPOLYGON (((-62.0495 -1...", "MULTIPOLYGON (((-63.0495 -1...",
                                                                     "MULTIPOLYGON (((-63.0495 -1...", "MULTIPOLYGON (((-63.0495 -1...",
                                                                     "MULTIPOLYGON (((-63.0495 -1...", "MULTIPOLYGON (((-63.0495 -1...",
                                                                     "MULTIPOLYGON (((-63.0495 -1...")), row.names = c(NA, -18L), class = c("tbl_df", "tbl", "data.frame"))

输入:

 df
# A tibble: 18 x 4
    Year ADM2_PCODE Valor geometry                      
   <dbl>      <dbl> <dbl> <chr>                         
 1  1981    1100015    NA MULTIPOLYGON (((-62.0495 -1...
 2  1982    1100015    NA MULTIPOLYGON (((-62.0495 -1...
 3  1983    1100015    30 MULTIPOLYGON (((-62.0495 -1...
 4  1984    1100015    30 MULTIPOLYGON (((-62.0495 -1...
 5  1985    1100015    NA MULTIPOLYGON (((-62.0495 -1...
 6  1986    1100015    NA MULTIPOLYGON (((-62.0495 -1...
 7  1981    1100016    90 MULTIPOLYGON (((-63.0495 -1...
 8  1982    1100016    10 MULTIPOLYGON (((-62.0495 -1...
 9  1983    1100016    90 MULTIPOLYGON (((-62.0495 -1...
10  1984    1100016    10 MULTIPOLYGON (((-62.0495 -1...
11  1985    1100016    10 MULTIPOLYGON (((-62.0495 -1...
12  1986    1100016    10 MULTIPOLYGON (((-62.0495 -1...
13  1981    1100017    10 MULTIPOLYGON (((-63.0495 -1...
14  1982    1100017    20 MULTIPOLYGON (((-63.0495 -1...
15  1983    1100017    30 MULTIPOLYGON (((-63.0495 -1...
16  1984    1100017    40 MULTIPOLYGON (((-63.0495 -1...
17  1985    1100017    50 MULTIPOLYGON (((-63.0495 -1...
18  1986    1100017    60 MULTIPOLYGON (((-63.0495 -1...

预期输出:

ADM2_PCODE max_consecutive_values 
       <dbl>  <lgl>    
1    1100015 2 
2    1100016 3 
3    1100017 1 

使用data.table rleid 来跟踪您可以做的连续值 -

library(dplyr)
library(data.table)

df %>%
  filter(!is.na(Valor)) %>%
  group_by(ADM2_PCODE) %>%
  mutate(grp = rleid(Valor)) %>%
  count(grp) %>%
  summarise(max_consecutive_values = max(n))

#  ADM2_PCODE max_consecutive_values
#       <dbl>                  <int>
#1    1100015                      2
#2    1100016                      3
#3    1100017                      1