使用月份的几个最大值计算年度平均值

Calculating annual mean using few maximum values of months

我有一个包含 11 个变量的时间序列数据集 https://drive.google.com/file/d/1x63IfB429i3JKrheWNiKn0nIg28xyp9R/view?usp=sharing

我正在尝试计算变量 (v1:v11) 的年均值,我可以使用 r 中的 tidyr 和 dplyr 包来做到这一点,特别是使用 group_by 和 summarize 函数。

 library(tidyr)
    library(dplyr)
    
    tidied_df <- d1 %>%
      ### first to make the data in long tidy formate
      pivot_longer(v1:v11, names_to = "VIs",
                   values_to = "Value")%>%
      drop_na()
    
    # computing the mean for all plots
    annual_growing_mn_mx<-
      tidied_df %>%
      group_by(Plot_code, VIs, Year) %>%
      summarise(VIs_mn = mean(Value, na.rn = FALSE), VIs_mx = max(Value, na.rn = FALSE))

但是,现在我只想使用特定长度来计算这个平均值。例如,我想使用给定年份中 3 个最大月份的值来计算年度平均值。这表明我将只考虑那些比其他任何月份具有最大值的 3 个月的年度平均值。输出应该像这样:

一种简单的方法是通过子集化或过滤来生成新的数据框,其中我将只有那些最多 3 个月的数据。然后我可以再次总结它以获得年度平均值。

我尝试了不同的包和功能,但无法成功。

非常感谢任何帮助!!!

可重现的例子:

structure(list(X = 1:105, Plot_code = c("AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu", 
"AT_Neu", "AT_Neu", "AT_Neu", "AT_Neu"), Year = c(2002L, 2002L, 
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 
2002L, 2002L, 2002L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2004L, 
2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 
2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 
2004L, 2004L, 2004L, 2004L, 2005L, 2005L, 2005L, 2005L, 2005L, 
2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 
2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 
2006L, 2006L, 2006L, 2006L), Month = c(1L, 1L, 10L, 11L, 11L, 
12L, 12L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 
8L, 9L, 9L, 1L, 1L, 10L, 11L, 11L, 12L, 12L, 2L, 2L, 3L, 3L, 
4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 1L, 1L, 10L, 
10L, 11L, 12L, 12L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 
7L, 8L, 8L, 9L, 9L, 1L, 1L, 10L, 11L, 11L, 12L, 12L, 2L, 2L, 
3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 1L, 1L, 
10L, 11L, 11L, 12L, 12L, 2L, 2L, 3L, 3L, 4L, 4L), v1 = c(NA, 
NA, 0.63, 0.62, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.82, 0.83, 
0.73, 0.76, 0.79, 0.8, 0.72, 0.76, 0.85, NA, NA, NA, 0.66, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.77, 0.67, NA, 0.74, 
0.7, 0.71, 0.79, 0.78, NA, NA, NA, 0.71, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, 0.86, NA, 0.75, 0.75, NA, 0.86, NA, 0.75, 
0.79, 0.8, NA, NA, 0.71, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, 0.77, NA, 0.69, 0.7, NA, 0.85, NA, 0.77, 0.82, 0.74, NA, 
NA, 0.7, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), v2 = c(NA, 
NA, 0.48, 0.43, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.66, 0.69, 
0.54, 0.58, 0.64, 0.64, 0.56, 0.56, 0.73, NA, NA, NA, 0.46, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.59, 0.5, NA, 0.53, 
0.5, 0.51, 0.57, 0.6, NA, NA, NA, 0.52, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, 0.78, NA, 0.48, 0.62, NA, 0.68, NA, 0.56, 0.64, 
0.62, NA, NA, 0.49, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.61, 
NA, 0.5, 0.52, NA, 0.73, NA, 0.59, 0.65, 0.52, NA, NA, 0.49, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), v3 = c(NA, NA, 0.47, 
0.43, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.62, 0.64, 0.52, 0.56, 
0.61, 0.61, 0.54, 0.56, 0.66, NA, NA, NA, 0.47, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, 0.57, 0.5, NA, 0.52, 0.49, 0.5, 0.54, 
0.57, NA, NA, NA, 0.52, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
0.69, NA, 0.48, 0.57, NA, 0.64, NA, 0.55, 0.59, 0.59, NA, NA, 
0.48, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.59, NA, 0.48, 
0.5, NA, 0.66, NA, 0.56, 0.61, 0.52, NA, NA, 0.48, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA), v4 = c(NA, NA, 4.45, 4.32, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, 10.31, 10.69, 6.44, 7.27, 8.59, 9.05, 
6.08, 7.32, 12.73, NA, NA, NA, 4.95, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, 7.81, 5.13, NA, 6.74, 5.7, 5.83, 8.68, 8.19, 
NA, NA, NA, 5.81, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 13.61, 
NA, 7.04, 7.03, NA, 12.88, NA, 7.02, 8.48, 8.99, NA, NA, 5.99, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 7.54, NA, 5.39, 5.62, 
NA, 12.09, NA, 7.81, 9.9, 6.79, NA, NA, 5.63, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA), v5 = c(NA, NA, 0.59, 0.55, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, 0.74, 0.73, 0.56, 0.61, 0.72, 0.7, 0.58, 
0.72, 0.73, NA, NA, NA, 0.49, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, 0.66, 0.55, NA, 0.67, 0.51, 0.53, 0.62, 0.67, NA, 
NA, NA, 0.63, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.73, NA, 
0.57, 0.59, NA, 0.77, NA, 0.65, 0.64, 0.66, NA, NA, 0.54, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.72, NA, 0.46, 0.54, NA, 
0.69, NA, 0.58, 0.74, 0.67, NA, NA, 0.55, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA), v6 = c(NA, NA, 0.04, 0.04, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, 0.02, 0.02, 0.03, 0.03, 0.02, 0.02, 0.03, 
0.02, 0.02, NA, NA, NA, 0.03, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, 0.03, 0.03, NA, 0.02, 0.03, 0.03, 0.02, 0.03, NA, 
NA, NA, 0.03, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.03, NA, 
0.02, 0.04, NA, 0.02, NA, 0.02, 0.03, 0.02, NA, NA, 0.03, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.03, NA, 0.04, 0.03, NA, 
0.03, NA, 0.03, 0.02, 0.02, NA, NA, 0.03, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA), v7 = c(NA, NA, 0.09, 0.08, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, 0.05, 0.04, 0.06, 0.06, 0.06, 0.05, 0.07, 
0.06, 0.04, NA, NA, NA, 0.07, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, 0.05, 0.08, NA, 0.06, 0.07, 0.07, 0.04, 0.05, NA, 
NA, NA, 0.07, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.04, NA, 
0.05, 0.06, NA, 0.04, NA, 0.06, 0.05, 0.05, NA, NA, 0.06, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.06, NA, 0.07, 0.07, NA, 
0.04, NA, 0.05, 0.05, 0.06, NA, NA, 0.06, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA), v8 = c(NA, NA, 0.4, 0.35, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, 0.46, 0.48, 0.39, 0.43, 0.47, 0.47, 0.43, 
0.42, 0.5, NA, NA, NA, 0.36, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, 0.42, 0.41, NA, 0.38, 0.37, 0.38, 0.38, 0.42, NA, 
NA, NA, 0.41, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.53, NA, 
0.32, 0.45, NA, 0.46, NA, 0.42, 0.45, 0.43, NA, NA, 0.35, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.46, NA, 0.37, 0.39, NA, 
0.5, NA, 0.41, 0.45, 0.38, NA, NA, 0.36, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA), v9 = c(NA, NA, 0.1, 0.1, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, 0.07, 0.08, 0.11, 0.1, 0.08, 0.08, 0.11, 
0.07, 0.08, NA, NA, NA, 0.12, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, 0.09, 0.12, NA, 0.08, 0.12, 0.12, 0.09, 0.08, NA, 
NA, NA, 0.09, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.08, NA, 
0.09, 0.12, NA, 0.06, NA, 0.09, 0.1, 0.09, NA, NA, 0.1, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, 0.07, NA, 0.14, 0.12, NA, 0.09, 
NA, 0.11, 0.07, 0.08, NA, NA, 0.1, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA), v10 = c(99980800.92, 99980800.92, 0.22, 0.189, 99980800.92, 
99980800.92, 99980800.92, 99980800.92, 99980800.92, 99980800.92, 
99980800.92, 99980800.92, 99980800.92, 0.3404, 0.36, 0.2535, 
0.2924, 0.3337, 0.3384, 0.2752, 0.2856, 0.385, 99980800.92, 99980800.92, 
99980800.92, 0.2088, 99980800.92, 99980800.92, 99980800.92, 99980800.92, 
99980800.92, 99980800.92, 99980800.92, 99980800.92, 99980800.92, 
99980800.92, 99980800.92, 0.2898, 0.2419, 99980800.92, 0.2508, 
0.2294, 0.2394, 0.2698, 0.294, 99980800.92, 99980800.92, 99980800.92, 
0.2583, 99980800.92, 99980800.92, 99980800.92, 99980800.92, 99980800.92, 
99980800.92, 99980800.92, 99980800.92, 99980800.92, 99980800.92, 
0.4134, 99980800.92, 0.2144, 0.3015, 99980800.92, 0.3588, 99980800.92, 
0.2814, 0.3195, 0.3096, 99980800.92, 99980800.92, 0.2205, 99980800.92, 
99980800.92, 99980800.92, 99980800.92, 99980800.92, 99980800.92, 
99980800.92, 99980800.92, 99980800.92, 99980800.92, 0.3174, 99980800.92, 
0.2257, 0.2418, 99980800.92, 0.385, 99980800.92, 0.2829, 0.333, 
0.2508, 99980800.92, 99980800.92, 0.2232, 99980800.92, 99980800.92, 
99980800.92, 99980800.92, 99980800.92, 99980800.92, 99980800.92, 
99980800.92, 99980800.92, 99980800.92), v11 = c(0, 0, 0.788622527, 
0.66959026, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.953392233, 0.973284802, 
0.836679489, 0.908891095, 0.953392233, 0.961089831, 0.893697727, 
0.893697727, 0.982013057, 0, 0, 0, 0.732659321, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0.908891095, 0.836679489, 0, 0.81366445, 0.761594156, 
0.788622527, 0.857664567, 0.908891095, 0, 0, 0, 0.857664567, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.990411723, 0, 0.66959026, 0.93414721, 
0, 0.961089831, 0, 0.893697727, 0.944455651, 0.92233483, 0, 0, 
0.732659321, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.944455651, 0, 0.761594156, 
0.81366445, 0, 0.982013057, 0, 0.893697727, 0.944455651, 0.81366445, 
0, 0, 0.761594156, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, 
105L), class = "data.frame")

也许您只想使用 dplyr 中的 across。无需在 tidyr

中重塑两次
  • across 将同时对所有需要的列进行操作
  • order(., na.last = F) 将按升序排列,将 NA 保留在开头。所以
  • .[order(., na.last = F)] 将对向量进行排序
  • tail(.[order(., na.last = F)], 3) 将给出最后三个值(三个最大值)
  • 此后将其中的 mean 按预期进行。

希望这能澄清语法。

library(dplyr, warn.conflicts = F)

d1 <- read.csv('C:\Users\Acer\Documents\d1.csv')

d1 %>% group_by(Plot_code, Year) %>%
  summarise(across(starts_with('v'), ~ mean(tail(.[order(., na.last = F)],3), na.rm = T)), .groups = 'drop')
#> # A tibble: 44 x 13
#>    Plot_code  Year    v1    v2    v3    v4    v5     v6     v7    v8     v9
#>    <chr>     <int> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl> <dbl>  <dbl>
#>  1 AT_Neu     2002 0.833 0.693 0.64  11.2  0.733 0.0367 0.08   0.483 0.107 
#>  2 AT_Neu     2003 0.78  0.587 0.56   8.23 0.667 0.03   0.0733 0.417 0.12  
#>  3 AT_Neu     2004 0.84  0.7   0.64  11.8  0.72  0.0333 0.0633 0.48  0.103 
#>  4 AT_Neu     2005 0.813 0.663 0.62   9.93 0.717 0.0333 0.0667 0.47  0.123 
#>  5 AT_Neu     2006 0.823 0.67  0.62  10.6  0.697 0.03   0.0633 0.463 0.103 
#>  6 AT_Neu     2007 0.763 0.573 0.553  7.61 0.663 0.03   0.0633 0.413 0.103 
#>  7 AT_Neu     2008 0.793 0.627 0.59   8.66 0.723 0.03   0.06   0.46  0.09  
#>  8 AT_Neu     2009 0.823 0.67  0.62  10.2  0.687 0.02   0.05   0.467 0.0833
#>  9 AT_Neu     2010 0.81  0.65  0.603  9.41 0.713 0.0267 0.0633 0.46  0.09  
#> 10 AT_Neu     2011 0.783 0.593 0.567  8.24 0.69  0.03   0.08   0.423 0.0967
#> # ... with 34 more rows, and 2 more variables: v10 <dbl>, v11 <dbl>

dput后续分享,以上代码结果

# A tibble: 5 x 13
  Plot_code  Year    v1    v2    v3    v4    v5     v6     v7    v8    v9       v10   v11
  <chr>     <int> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>     <dbl> <dbl>
1 AT_Neu     2002 0.833 0.693  0.64 11.2  0.733 0.0367 0.08   0.483 0.107 99980801. 0.972
2 AT_Neu     2003 0.78  0.587  0.56  8.23 0.667 0.03   0.0733 0.417 0.12  99980801. 0.892
3 AT_Neu     2004 0.84  0.7    0.64 11.8  0.72  0.0333 0.0633 0.48  0.103 99980801. 0.965
4 AT_Neu     2005 0.813 0.663  0.62  9.93 0.717 0.0333 0.0667 0.47  0.123 99980801. 0.957
5 AT_Neu     2006 0.7   0.49   0.48  5.63 0.55  0.03   0.06   0.36  0.1   99980801. 0.254