将 reduce 应用于 R 数据框中列的每一行,其中包含一个列表
apply reduce to each row, containing a list, of a column in R dataframe
我已经包含了我的数据框的 20 行:
structure(list(countyfips = c(1003, 1003, 1003, 1003, 1003, 1005,
1005, 1005, 1005, 1005, 1007, 1007, 1007, 1007, 1007, 1009, 1009,
1009, 1009, 1009), engagement = c("-.186", "-.231", "-.0681",
"-.38", "-.267", "-.0148", ".00322", ".0804", "-.478", "-.83",
"-.0532", "-.162", "-.0185", "-.883", "-.909", ".0278", "-.537",
"-.691", "-.972", "-.981")), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
我使用以下表达式对其进行了分组:
math_stu_online_engage %>%
group_by(countyfips) %>% summarise(monthly_engagement = list(engagement))
我现在想将以下 Reduce 函数应用于每月参与列的每个列表/行:
mutate(acc_perc_change = Reduce(function(x, y) x + x * y, monthly_engagement))
但是我收到这个错误:
Error: Problem with `mutate()` input `acc_perc_change`.
x non-numeric argument to binary operator
我做错了什么?
非常感谢!
它是 character
列。所以,我们需要先把它转换成numeric
。其次,reduce
/Reduce
输出的长度为 5,而行数仅为 4。因此,我们可能需要将其包装在 list
中
library(dplyr)
library(purrr)
df1 %>%
group_by(countyfips) %>%
summarise(monthly_engagement = as.numeric(engagement)) %>%
mutate(acc_perc_change =
reduce(monthly_engagement, ~ .x + .x * .y)) %>%
ungroup
-输出
# A tibble: 20 x 3
countyfips monthly_engagement acc_perc_change
<dbl> <dbl> <dbl>
1 1003 -0.186 -0.0606
2 1003 -0.231 -0.0606
3 1003 -0.0681 -0.0606
4 1003 -0.38 -0.0606
5 1003 -0.267 -0.0606
6 1005 -0.0148 -0.00142
7 1005 0.00322 -0.00142
8 1005 0.0804 -0.00142
9 1005 -0.478 -0.00142
10 1005 -0.83 -0.00142
11 1007 -0.0532 -0.000466
12 1007 -0.162 -0.000466
13 1007 -0.0185 -0.000466
14 1007 -0.883 -0.000466
15 1007 -0.909 -0.000466
16 1009 0.0278 0.00000212
17 1009 -0.537 0.00000212
18 1009 -0.691 0.00000212
19 1009 -0.972 0.00000212
20 1009 -0.981 0.00000212
这是 base R 中的解决方案:
do.call(rbind, lapply(unique(df$countyfips), function(a) {
tmp <- subset(df, countyfips == a)
tmp <- transform(tmp, engagement = as.numeric(engagement))
tmp$acc_perc_change <- Reduce(function(x, y) {
x + x * y
}, tmp$engagement)
tmp
}))
这是一个简化的 tidyverse
解决方案:
library(purrr)
df %>%
group_by(countyfips) %>%
summarise(engagement = as.numeric(engagement),
acc_perc_change = reduce(engagement, ~ .x + .x * .y))
我已经包含了我的数据框的 20 行:
structure(list(countyfips = c(1003, 1003, 1003, 1003, 1003, 1005,
1005, 1005, 1005, 1005, 1007, 1007, 1007, 1007, 1007, 1009, 1009,
1009, 1009, 1009), engagement = c("-.186", "-.231", "-.0681",
"-.38", "-.267", "-.0148", ".00322", ".0804", "-.478", "-.83",
"-.0532", "-.162", "-.0185", "-.883", "-.909", ".0278", "-.537",
"-.691", "-.972", "-.981")), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
我使用以下表达式对其进行了分组:
math_stu_online_engage %>%
group_by(countyfips) %>% summarise(monthly_engagement = list(engagement))
我现在想将以下 Reduce 函数应用于每月参与列的每个列表/行:
mutate(acc_perc_change = Reduce(function(x, y) x + x * y, monthly_engagement))
但是我收到这个错误:
Error: Problem with `mutate()` input `acc_perc_change`.
x non-numeric argument to binary operator
我做错了什么?
非常感谢!
它是 character
列。所以,我们需要先把它转换成numeric
。其次,reduce
/Reduce
输出的长度为 5,而行数仅为 4。因此,我们可能需要将其包装在 list
library(dplyr)
library(purrr)
df1 %>%
group_by(countyfips) %>%
summarise(monthly_engagement = as.numeric(engagement)) %>%
mutate(acc_perc_change =
reduce(monthly_engagement, ~ .x + .x * .y)) %>%
ungroup
-输出
# A tibble: 20 x 3
countyfips monthly_engagement acc_perc_change
<dbl> <dbl> <dbl>
1 1003 -0.186 -0.0606
2 1003 -0.231 -0.0606
3 1003 -0.0681 -0.0606
4 1003 -0.38 -0.0606
5 1003 -0.267 -0.0606
6 1005 -0.0148 -0.00142
7 1005 0.00322 -0.00142
8 1005 0.0804 -0.00142
9 1005 -0.478 -0.00142
10 1005 -0.83 -0.00142
11 1007 -0.0532 -0.000466
12 1007 -0.162 -0.000466
13 1007 -0.0185 -0.000466
14 1007 -0.883 -0.000466
15 1007 -0.909 -0.000466
16 1009 0.0278 0.00000212
17 1009 -0.537 0.00000212
18 1009 -0.691 0.00000212
19 1009 -0.972 0.00000212
20 1009 -0.981 0.00000212
这是 base R 中的解决方案:
do.call(rbind, lapply(unique(df$countyfips), function(a) {
tmp <- subset(df, countyfips == a)
tmp <- transform(tmp, engagement = as.numeric(engagement))
tmp$acc_perc_change <- Reduce(function(x, y) {
x + x * y
}, tmp$engagement)
tmp
}))
这是一个简化的 tidyverse
解决方案:
library(purrr)
df %>%
group_by(countyfips) %>%
summarise(engagement = as.numeric(engagement),
acc_perc_change = reduce(engagement, ~ .x + .x * .y))