根据多个时间段计算列的平均值

Question

我需要帮助来弄清楚如何每 ___ 小时计算一个变量的平均值。我想每 1/2 小时计算一次平均值，然后每 1、2、4 和 6 小时计算一次平均值。

这是我的数据集：

dput(head(R3L12, 10))

structure(list(Date = c("2015-05-23", "2015-05-23", "2015-05-23", 
"2015-05-23", "2015-05-23", "2015-05-23", "2015-05-23", "2015-05-23", 
"2015-05-23", "2015-05-23"), Time = c("07:25:00", "07:40:00", 
"07:45:00", "09:10:00", "11:45:00", "11:55:00", "12:05:00", "12:35:00", 
"12:45:00", "13:30:00"), Turtle = structure(c(3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("R3L1", "R3L11", "R3L12", 
"R3L2", "R3L4", "R3L8", "R3L9", "R4L8", "R8L1", "R8L4", "R8NAT123"
), class = "factor"), Tex = c(11.891, 12.008, 12.055, 13.219, 
18.727, 18.992, 19.477, 20.367, 20.641, 28.305), m.Tb = c(12.477, 
12.54, 12.54, 12.978, 16.362, 16.612, 17.238, 19.617, 19.993, 
24.371), m.HR = c(7.56457, 6.66759, 17.51107, 9.72277, 19.44553, 
13.07674, 28.115, 14.99467, 17.16947, 40.40479), season = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("beginning", 
"end", "middle"), class = "factor"), year = c(2015L, 2015L, 2015L, 
2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L), Mass = c(360L, 
360L, 360L, 360L, 360L, 360L, 360L, 360L, 360L, 360L)), row.names = c(NA, 
10L), class = "data.frame")

我希望能够计算每个日期每个时间段的平均值 m.Tb。例如，对于 2015-05-23，我希望每 30 分钟、1 小时、2 小时、4 小时和 6 小时的平均值为 m.Tb。然后我想在第二天重复一遍。有时“时间”列中有“缺失”行，这是因为 NA 行已被删除。

如果您需要说明或有任何疑问，请告诉我，因为我还是 r 的新手。

Answer 1

希望这就是您要找的。由于生成的数据帧具有不同的行号，我不得不将它们存储在列表中。为此，我首先创建了一个包含所有你想要计算平均值的时间跨度的字符向量，然后我使用 purrr 包中的 map 函数来迭代它们，替换 breaks 参数在 cut 中创建您想要的时间跨度。

library(dplyr)
library(lubridate)
library(purrr)

breaks <- c("15 min", "30 min", "1 hour", "2 hour", "4 hour", "6 hour")

breaks %>%
  map(~ df %>% 
            unite("Date-Time", c("Date", "Time"), sep = " ", remove = FALSE) %>% 
            mutate(`Date-Time` = ymd_hms(`Date-Time`)) %>%
            mutate(DS = cut(`Date-Time`, breaks = .x)) %>%
            group_by(ymd(Date), DS) %>%
            summarise(avg = mean(m.Tb))) %>%
  set_names(breaks)


$`15 min`
# A tibble: 8 x 3
# Groups:   ymd(Date) [1]
  `ymd(Date)` DS                    avg
  <date>      <fct>               <dbl>
1 2015-05-23  2015-05-23 07:25:00  12.5
2 2015-05-23  2015-05-23 07:40:00  12.5
3 2015-05-23  2015-05-23 09:10:00  13.0
4 2015-05-23  2015-05-23 11:40:00  16.4
5 2015-05-23  2015-05-23 11:55:00  16.9
6 2015-05-23  2015-05-23 12:25:00  19.6
7 2015-05-23  2015-05-23 12:40:00  20.0
8 2015-05-23  2015-05-23 13:25:00  24.4

$`30 min`
# A tibble: 6 x 3
# Groups:   ymd(Date) [1]
  `ymd(Date)` DS                    avg
  <date>      <fct>               <dbl>
1 2015-05-23  2015-05-23 07:25:00  12.5
2 2015-05-23  2015-05-23 08:55:00  13.0
3 2015-05-23  2015-05-23 11:25:00  16.4
4 2015-05-23  2015-05-23 11:55:00  16.9
5 2015-05-23  2015-05-23 12:25:00  19.8
6 2015-05-23  2015-05-23 13:25:00  24.4

$`1 hour`
# A tibble: 5 x 3
# Groups:   ymd(Date) [1]
  `ymd(Date)` DS                    avg
  <date>      <fct>               <dbl>
1 2015-05-23  2015-05-23 07:00:00  12.5
2 2015-05-23  2015-05-23 09:00:00  13.0
3 2015-05-23  2015-05-23 11:00:00  16.5
4 2015-05-23  2015-05-23 12:00:00  18.9
5 2015-05-23  2015-05-23 13:00:00  24.4

$`2 hour`
# A tibble: 4 x 3
# Groups:   ymd(Date) [1]
  `ymd(Date)` DS                    avg
  <date>      <fct>               <dbl>
1 2015-05-23  2015-05-23 07:00:00  12.5
2 2015-05-23  2015-05-23 09:00:00  13.0
3 2015-05-23  2015-05-23 11:00:00  18.0
4 2015-05-23  2015-05-23 13:00:00  24.4

$`4 hour`
# A tibble: 2 x 3
# Groups:   ymd(Date) [1]
  `ymd(Date)` DS                    avg
  <date>      <fct>               <dbl>
1 2015-05-23  2015-05-23 07:00:00  12.6
2 2015-05-23  2015-05-23 11:00:00  19.0

$`6 hour`
# A tibble: 2 x 3
# Groups:   ymd(Date) [1]
  `ymd(Date)` DS                    avg
  <date>      <fct>               <dbl>
1 2015-05-23  2015-05-23 07:00:00  15.6
2 2015-05-23  2015-05-23 13:00:00  24.4

Answer 2

我们可以使用 lubridate

中的 ceiling_date

library(lubridate)
library(dplyr)
library(stringr)
R3L12 %>% 
   group_by(DS = ceiling_date(as.POSIXct(str_c(Date, Time, sep=" ")), 
         unit = '30 min' )) %>% 
   summarise(avg_30 = mean(m.Tb)) %>% 
   mutate(date = as.Date(DS))

-输出

# A tibble: 7 x 3
#  DS                  avg_30 date      
#  <dttm>               <dbl> <date>    
#1 2015-05-23 07:30:00   12.5 2015-05-23
#2 2015-05-23 08:00:00   12.5 2015-05-23
#3 2015-05-23 09:30:00   13.0 2015-05-23
#4 2015-05-23 12:00:00   16.5 2015-05-23
#5 2015-05-23 12:30:00   17.2 2015-05-23
#6 2015-05-23 13:00:00   19.8 2015-05-23
#7 2015-05-23 13:30:00   24.4 2015-05-23

Answer 3

这就是我要做的，你有很多缺失的时期，所以它不是半小时聚合的最佳输出

data_example <- structure(list(Date = c("2015-05-23", "2015-05-23", "2015-05-23", 
                        "2015-05-23", "2015-05-23", "2015-05-23", "2015-05-23", "2015-05-23", 
                        "2015-05-23", "2015-05-23"), Time = c("07:25:00", "07:40:00", 
                                                              "07:45:00", "09:10:00", "11:45:00", "11:55:00", "12:05:00", "12:35:00", 
                                                              "12:45:00", "13:30:00"), Turtle = structure(c(3L, 3L, 3L, 3L, 
                                                                                                            3L, 3L, 3L, 3L, 3L, 3L), .Label = c("R3L1", "R3L11", "R3L12", 
                                                                                                                                                "R3L2", "R3L4", "R3L8", "R3L9", "R4L8", "R8L1", "R8L4", "R8NAT123"
                                                                                                            ), class = "factor"), Tex = c(11.891, 12.008, 12.055, 13.219, 
                                                                                                                                          18.727, 18.992, 19.477, 20.367, 20.641, 28.305), m.Tb = c(12.477, 
                                                                                                                                                                                                    12.54, 12.54, 12.978, 16.362, 16.612, 17.238, 19.617, 19.993, 
                                                                                                                                                                                                    24.371), m.HR = c(7.56457, 6.66759, 17.51107, 9.72277, 19.44553, 
                                                                                                                                                                                                                      13.07674, 28.115, 14.99467, 17.16947, 40.40479), season = structure(c(1L, 
                                                                                                                                                                                                                                                                                            1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("beginning", 
                                                                                                                                                                                                                                                                                                                                            "end", "middle"), class = "factor"), year = c(2015L, 2015L, 2015L, 
                                                                                                                                                                                                                                                                                                                                                                                          2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L), Mass = c(360L, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                     360L, 360L, 360L, 360L, 360L, 360L, 360L, 360L, 360L)), row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           10L), class = "data.frame")

library(tidyverse)

floor_30 <- function(x) clock::date_floor(x = x,precision = "minute",n = 30)


mean_at_inteval <- function(data,date_col,interval_func) {
  data |> 
  group_by(interval = {{date_col}} |> interval_func()) |> 
  summarise(sum_interval = sum(m.Tb)) |>
  summarise(mean_interval = mean(sum_interval))
}

nest_example_data <- data_example %>%
  mutate(date_timer = str_c(Date,Time) %>% clock::date_time_parse(zone = "UTC")) |> 
  nest_by(Date)

final_data <- nest_example_data |> mutate(floor_30 = data |> mean_at_inteval(date_col = date_timer,interval_func = floor_30))

final_data
#> # A tibble: 1 x 3
#> # Rowwise:  Date
#>   Date                     data floor_30$mean_interval
#>   <chr>      <list<tibble[,9]>>                  <dbl>
#> 1 2015-05-23           [10 x 9]                   23.5

^{由 reprex package (v2.0.0)}

于 2021-05-30 创建

根据多个时间段计算列的平均值

Calculating average of a column based on multiple time periods

r

lubridate

dplyr