R:在数据框的每一行的各个列上应用函数的整洁方法是什么?
R: Which is the tidy way to apply a function over various columns of each row of a data frame?
我想将函数应用于数据框的所有行,其中每个应用列作为不同的输入(不像 mean
,而是作为参数)。
我想知道执行以下操作的整洁方法是什么:
# Data
successes <- c(0,3,6,15,15,17,12,9,22,33)
trials <- c(50,1788,1876,3345,1223,856,342,214,265,257)
prognosis <- 0.01*c(0.05,0.10,0.25,0.5,0.75,1.3,2,3.4,6,10)
test_data = data.frame(successes = successes, trials = trials,
prognosis = prognosis, p_value1 = NA, p_value2 = NA)
for(i in 1: nrow(test_data)){
test_data$p_value1[i] = binom.test(test_data$successes[i], test_data$trials[i],
test_data$prognosis[i], "less")$p.value
test_data$p_value2[i] = binom.test(test_data$successes[i], test_data$trials[i],
test_data$prognosis[i], "greater")$p.value
}
一种可能的方法是:
successes <- c(0,3,6,15,15,17,12,9,22,33)
trials <- c(50,1788,1876,3345,1223,856,342,214,265,257)
prognosis <- 0.01*c(0.05,0.10,0.25,0.5,0.75,1.3,2,3.4,6,10)
test_data = data.frame(successes = successes, trials = trials,
prognosis = prognosis, p_value1 = NA, p_value2 = NA)
library(dplyr)
test_data %>%
rowwise() %>%
mutate(p_value1 = binom.test(successes, trials, prognosis, "less")$p.value,
p_value2 = binom.test(successes, trials, prognosis, "greater")$p.value) %>%
ungroup()
# # A tibble: 10 x 5
# successes trials prognosis p_value1 p_value2
# <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 0. 50. 0.000500 0.975 1.00
# 2 3. 1788. 0.00100 0.893 0.266
# 3 6. 1876. 0.00250 0.806 0.330
# 4 15. 3345. 0.00500 0.396 0.697
# 5 15. 1223. 0.00750 0.975 0.0467
# 6 17. 856. 0.0130 0.966 0.0595
# 7 12. 342. 0.0200 0.978 0.0447
# 8 9. 214. 0.0340 0.805 0.306
# 9 22. 265. 0.0600 0.950 0.0786
# 10 33. 257. 0.100 0.943 0.0822
或者使用不带rowwise
的向量化函数:
# create function and vectorise it
GetPvalue = function(s, t, p, alt) {binom.test(s, t, p, alt)$p.value}
GetPvalue = Vectorize(GetPvalue)
test_data %>%
mutate(p_value1 = GetPvalue(successes, trials, prognosis, "less"),
p_value2 = GetPvalue(successes, trials, prognosis, "greater"))
# successes trials prognosis p_value1 p_value2
# 1 0 50 0.0005 0.9753038 1.00000000
# 2 3 1788 0.0010 0.8933086 0.26613930
# 3 6 1876 0.0025 0.8061877 0.32975624
# 4 15 3345 0.0050 0.3963610 0.69722243
# 5 15 1223 0.0075 0.9748903 0.04667939
# 6 17 856 0.0130 0.9656352 0.05952219
# 7 12 342 0.0200 0.9781863 0.04473155
# 8 9 214 0.0340 0.8047247 0.30581962
# 9 22 265 0.0600 0.9503332 0.07855963
# 10 33 257 0.1000 0.9433326 0.08219425
我想将函数应用于数据框的所有行,其中每个应用列作为不同的输入(不像 mean
,而是作为参数)。
我想知道执行以下操作的整洁方法是什么:
# Data
successes <- c(0,3,6,15,15,17,12,9,22,33)
trials <- c(50,1788,1876,3345,1223,856,342,214,265,257)
prognosis <- 0.01*c(0.05,0.10,0.25,0.5,0.75,1.3,2,3.4,6,10)
test_data = data.frame(successes = successes, trials = trials,
prognosis = prognosis, p_value1 = NA, p_value2 = NA)
for(i in 1: nrow(test_data)){
test_data$p_value1[i] = binom.test(test_data$successes[i], test_data$trials[i],
test_data$prognosis[i], "less")$p.value
test_data$p_value2[i] = binom.test(test_data$successes[i], test_data$trials[i],
test_data$prognosis[i], "greater")$p.value
}
一种可能的方法是:
successes <- c(0,3,6,15,15,17,12,9,22,33)
trials <- c(50,1788,1876,3345,1223,856,342,214,265,257)
prognosis <- 0.01*c(0.05,0.10,0.25,0.5,0.75,1.3,2,3.4,6,10)
test_data = data.frame(successes = successes, trials = trials,
prognosis = prognosis, p_value1 = NA, p_value2 = NA)
library(dplyr)
test_data %>%
rowwise() %>%
mutate(p_value1 = binom.test(successes, trials, prognosis, "less")$p.value,
p_value2 = binom.test(successes, trials, prognosis, "greater")$p.value) %>%
ungroup()
# # A tibble: 10 x 5
# successes trials prognosis p_value1 p_value2
# <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 0. 50. 0.000500 0.975 1.00
# 2 3. 1788. 0.00100 0.893 0.266
# 3 6. 1876. 0.00250 0.806 0.330
# 4 15. 3345. 0.00500 0.396 0.697
# 5 15. 1223. 0.00750 0.975 0.0467
# 6 17. 856. 0.0130 0.966 0.0595
# 7 12. 342. 0.0200 0.978 0.0447
# 8 9. 214. 0.0340 0.805 0.306
# 9 22. 265. 0.0600 0.950 0.0786
# 10 33. 257. 0.100 0.943 0.0822
或者使用不带rowwise
的向量化函数:
# create function and vectorise it
GetPvalue = function(s, t, p, alt) {binom.test(s, t, p, alt)$p.value}
GetPvalue = Vectorize(GetPvalue)
test_data %>%
mutate(p_value1 = GetPvalue(successes, trials, prognosis, "less"),
p_value2 = GetPvalue(successes, trials, prognosis, "greater"))
# successes trials prognosis p_value1 p_value2
# 1 0 50 0.0005 0.9753038 1.00000000
# 2 3 1788 0.0010 0.8933086 0.26613930
# 3 6 1876 0.0025 0.8061877 0.32975624
# 4 15 3345 0.0050 0.3963610 0.69722243
# 5 15 1223 0.0075 0.9748903 0.04667939
# 6 17 856 0.0130 0.9656352 0.05952219
# 7 12 342 0.0200 0.9781863 0.04473155
# 8 9 214 0.0340 0.8047247 0.30581962
# 9 22 265 0.0600 0.9503332 0.07855963
# 10 33 257 0.1000 0.9433326 0.08219425