使用 dplyr 折叠因子和改变行

Collapsing factors and mutating rows with dplyr

鉴于:

x <- data.frame(Day = c(1,2,3,4,5,6,7,8,9,10),
                var1 = c(5,4,2,3,4,5,1,2,3,4),
                var2 = c(3,6,2,3,4,5,7,8,1,2),
                var3 = c(1,2,3,4,6,2,4,7,8,4),
                var4 = c(1,3,7,5,3,7,2,3,1,2))

day变量目前是数字,但对应1=星期一,5=星期五,6=星期一,10=星期五。我想将所有相应的日子合并在一起并按天平均它们的值:

z <- data.frame(Day = c("Monday", "Tuesday", "Wednesday", "Thursday","Friday"),
                var1 = c(5,2.5,2,3,4),
                var2 = c(4,6.5,5,2,3),
                var3 = c(1.5,3,5,6,5),
                var4 = c(4,2.5,5,3,2.5))

如果数据是有序的,通过复制日期创建分组变量,然后使用 summarise_at 获取 'var' 列的 mean

library(dplyr)
 v1 <- c("Monday", "Tuesday", 
            "Wednesday", "Thursday","Friday")
x %>%
   group_by(Day = factor(rep(v1, 2), levels = v1))  %>%
   summarise_at(vars(matches('var')), mean)
# A tibble: 5 x 5
#     Day  var1  var2  var3  var4
#       <chr> <dbl> <dbl> <dbl> <dbl>
# 1    Monday   5.0   4.0   1.5   4.0
# 2   Tuesday   2.5   6.5   3.0   2.5
# 3 Wednesday   2.0   5.0   5.0   5.0
# 4  Thursday   3.0   2.0   6.0   3.0
# 5    Friday   4.0   3.0   5.0   2.5

如果数据是无序的,则创建一个key/value数据集,与原始数据集连接,按'Day'分组后,得到mean如上

x1 <- data.frame(Day = 1:10, DayC = c("Monday", "Tuesday", 
        "Wednesday", "Thursday","Friday"), stringsAsFactors= FALSE)

x %>%
  left_join(., x1) %>% 
  group_by(Day = DayC) %>%
  summarise_at(vars(matches('var')), mean) %>%
  arrange(factor(Day, levels = v1))

使用modular %%:

days = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
x %>% group_by(Day = days[(Day - 1) %% 5 + 1]) %>% summarise_all(mean)

# A tibble: 5 x 5
#        Day  var1  var2  var3  var4
#      <chr> <dbl> <dbl> <dbl> <dbl>
#1    Friday   4.0   3.0   5.0   2.5
#2    Monday   5.0   4.0   1.5   4.0
#3  Thursday   3.0   2.0   6.0   3.0
#4   Tuesday   2.5   6.5   3.0   2.5
#5 Wednesday   2.0   5.0   5.0   5.0