如何使用存储在不带引号的向量中的变量名称?
How to use variables names as stored in a vector without the quotation marks?
我想创建一个循环来存储数据框中多个变量的 t 检验输出。但是当我将不同的变量存储在带引号的向量中时,这些变量不能用于 t 检验,因为它们是用引号保存的。例如,R 在循环中将第一个变量作为“variable_1”,这会产生错误,因为对于 t 检验,我需要不带引号的变量,例如t.test(variable_1 ~ 性别)。有人知道如何去掉向量中变量名称的引号吗?
variable <- c("variable_1", "variable_2", "variable_3")
df <- data.frame(t_value=as.numeric(),
df=as.numeric(),
p_value= as.numeric(),
mean_f= as.numeric(),
mean_m= as.numeric())
attach(data)
for(v in variable){
output <- t.test(v ~ Gender)
values <- output[c(1,2,3,5)]
row <- round(unlist(values, use.names = FALSE),3)
df <- rbind(df, row)
}
这里有一些更改可以通过 get
运行。正如其他人指出的那样, attach
在这种情况下是一个糟糕的主意。所以我使用 mtcars
作为示例并省略了它。
其他几项更改使事情变得尽可能好。你最好在 运行 a t-test 上搜索大量关于多个变量的答案,或者只使用 @starja 或 @r2evans 答案。
variable <- c("mpg", "hp")
df <- data.frame(t_value=as.numeric(),
df=as.numeric(),
p_value= as.numeric(),
mean_f= as.numeric(),
mean_m= as.numeric())
for(v in variable){
output <- t.test(get(v) ~ am, data = mtcars)
values <- output[c(1,2,3,5)]
row <- round(unlist(values, use.names = FALSE), 3)
df_row <- data.frame(t_value=row[[1]],
df=row[[2]],
p_value= row[[3]],
mean_f= row[[4]],
mean_m= row[[5]])
df <- rbind(df, df_row)
}
df
#> t_value df p_value mean_f mean_m
#> 1 -3.767 18.332 0.001 17.147 24.392
#> 2 1.266 18.715 0.221 160.263 126.846
这里有一些更现代的方法,其中包含 non-standard 评估和 purrr
。我已将您的循环逻辑放入为 variable
的每个条目调用的函数中。在函数内部,v
的值(字符串)被转换为符号。这是你的变量名。然后在为 t.test
.
的 data
参数提供的 data.frame 的上下文中评估此变量
library(purrr)
variable <- c("variable_1", "variable_2", "variable_3")
calc_fun <- function(v, input_data) {
output <- t.test(eval(rlang::sym(v)) ~ Gender, data = input_data)
values <- output[c(1,2,3,5)]
values <- round(unlist(values, use.names = FALSE),3)
data.frame(t_values = values[1],
df = values[2],
p_value = values[3],
mean_f = values[4],
mean_m = values[5])
}
df <- map_dfr(variable, ~calc_fun(v = .x, input_data = data))
使用@Chuck P 的示例,我的方法如下所示:
df <- map_dfr(variable, ~calc_fun(v = .x, input_data = data))
variable <- c("mpg", "hp")
calc_fun <- function(v, input_data) {
output <- t.test(eval(rlang::sym(v)) ~ am, data = input_data)
values <- output[c(1,2,3,5)]
values <- round(unlist(values, use.names = FALSE),3)
data.frame(t_values = values[1],
df = values[2],
p_value = values[3],
mean_f = values[4],
mean_m = values[5])
}
df <- map_dfr(variable, ~calc_fun(v = .x, input_data = mtcars))
df
t_values df p_value mean_f mean_m
1 -3.767 18.332 0.001 17.147 24.392
2 1.266 18.715 0.221 160.263 126.846
如果您需要将一个变量与帧中的所有(或部分)其他变量进行比较,则如下所示:
vars <- c("cyl", "disp", "hp", "gear")
do.call(
rbind.data.frame,
lapply(setNames(nm = vars), function(nm) {
out <- t.test(mtcars[["mpg"]], mtcars[[nm]])
c(out[c(1, 2, 3)], out[[5]])
})
)
# statistic parameter p.value mean.of.x mean.of.y
# cyl 12.51163 36.40239 9.507708e-15 20.09062 6.1875
# disp -9.60236 31.14661 7.978234e-11 20.09062 230.7219
# hp -10.40489 31.47905 1.030354e-11 20.09062 146.6875
# gear 15.28179 31.92893 3.077106e-16 20.09062 3.6875
如果你需要比较不同的对(而不是全部对一个),那么可能是
vars <- c("mpg", "cyl", "disp", "hp", "gear")
eg <- expand.grid(vars, vars, stringsAsFactors = FALSE)
eg <- eg[ eg[,1] != eg[,2], ]
head(eg)
# Var1 Var2
# 2 cyl mpg
# 3 disp mpg
# 4 hp mpg
# 5 gear mpg
# 6 mpg cyl
# 8 disp cyl
ret <- do.call(
rbind.data.frame,
Map(function(x, y) {
out <- t.test(x, y)
c(out[c(1, 2, 3)], out[[5]])
}, mtcars[eg[,1]], mtcars[eg[,2]])
)
ret <- cbind(eg, ret)
head(ret)
# Var1 Var2 statistic parameter p.value mean.of.x mean.of.y
# 2 cyl mpg -12.51163 36.40239 9.507708e-15 6.18750 20.09062
# 3 disp mpg 9.60236 31.14661 7.978234e-11 230.72188 20.09062
# 4 hp mpg 10.40489 31.47905 1.030354e-11 146.68750 20.09062
# 5 gear mpg -15.28179 31.92893 3.077106e-16 3.68750 20.09062
# 6 mpg cyl 12.51163 36.40239 9.507708e-15 20.09062 6.18750
# 8 disp cyl 10.24721 31.01287 1.774454e-11 230.72188 6.18750
---
Note:
1. Iteratively build a frame row-by-row works fine logically and in small doses, but in the long run it performs very poorly: it makes a complete copy of the whole frame with each row, which is memory-inefficient (and slow).
2. The use of `attach` is discouraged, as I said in my comment. Also, `get` should be avoided as well, though perhaps to a lesser degree than `attach`.
我想创建一个循环来存储数据框中多个变量的 t 检验输出。但是当我将不同的变量存储在带引号的向量中时,这些变量不能用于 t 检验,因为它们是用引号保存的。例如,R 在循环中将第一个变量作为“variable_1”,这会产生错误,因为对于 t 检验,我需要不带引号的变量,例如t.test(variable_1 ~ 性别)。有人知道如何去掉向量中变量名称的引号吗?
variable <- c("variable_1", "variable_2", "variable_3")
df <- data.frame(t_value=as.numeric(),
df=as.numeric(),
p_value= as.numeric(),
mean_f= as.numeric(),
mean_m= as.numeric())
attach(data)
for(v in variable){
output <- t.test(v ~ Gender)
values <- output[c(1,2,3,5)]
row <- round(unlist(values, use.names = FALSE),3)
df <- rbind(df, row)
}
这里有一些更改可以通过 get
运行。正如其他人指出的那样, attach
在这种情况下是一个糟糕的主意。所以我使用 mtcars
作为示例并省略了它。
其他几项更改使事情变得尽可能好。你最好在 运行 a t-test 上搜索大量关于多个变量的答案,或者只使用 @starja 或 @r2evans 答案。
variable <- c("mpg", "hp")
df <- data.frame(t_value=as.numeric(),
df=as.numeric(),
p_value= as.numeric(),
mean_f= as.numeric(),
mean_m= as.numeric())
for(v in variable){
output <- t.test(get(v) ~ am, data = mtcars)
values <- output[c(1,2,3,5)]
row <- round(unlist(values, use.names = FALSE), 3)
df_row <- data.frame(t_value=row[[1]],
df=row[[2]],
p_value= row[[3]],
mean_f= row[[4]],
mean_m= row[[5]])
df <- rbind(df, df_row)
}
df
#> t_value df p_value mean_f mean_m
#> 1 -3.767 18.332 0.001 17.147 24.392
#> 2 1.266 18.715 0.221 160.263 126.846
这里有一些更现代的方法,其中包含 non-standard 评估和 purrr
。我已将您的循环逻辑放入为 variable
的每个条目调用的函数中。在函数内部,v
的值(字符串)被转换为符号。这是你的变量名。然后在为 t.test
.
data
参数提供的 data.frame 的上下文中评估此变量
library(purrr)
variable <- c("variable_1", "variable_2", "variable_3")
calc_fun <- function(v, input_data) {
output <- t.test(eval(rlang::sym(v)) ~ Gender, data = input_data)
values <- output[c(1,2,3,5)]
values <- round(unlist(values, use.names = FALSE),3)
data.frame(t_values = values[1],
df = values[2],
p_value = values[3],
mean_f = values[4],
mean_m = values[5])
}
df <- map_dfr(variable, ~calc_fun(v = .x, input_data = data))
使用@Chuck P 的示例,我的方法如下所示:
df <- map_dfr(variable, ~calc_fun(v = .x, input_data = data))
variable <- c("mpg", "hp")
calc_fun <- function(v, input_data) {
output <- t.test(eval(rlang::sym(v)) ~ am, data = input_data)
values <- output[c(1,2,3,5)]
values <- round(unlist(values, use.names = FALSE),3)
data.frame(t_values = values[1],
df = values[2],
p_value = values[3],
mean_f = values[4],
mean_m = values[5])
}
df <- map_dfr(variable, ~calc_fun(v = .x, input_data = mtcars))
df
t_values df p_value mean_f mean_m
1 -3.767 18.332 0.001 17.147 24.392
2 1.266 18.715 0.221 160.263 126.846
如果您需要将一个变量与帧中的所有(或部分)其他变量进行比较,则如下所示:
vars <- c("cyl", "disp", "hp", "gear")
do.call(
rbind.data.frame,
lapply(setNames(nm = vars), function(nm) {
out <- t.test(mtcars[["mpg"]], mtcars[[nm]])
c(out[c(1, 2, 3)], out[[5]])
})
)
# statistic parameter p.value mean.of.x mean.of.y
# cyl 12.51163 36.40239 9.507708e-15 20.09062 6.1875
# disp -9.60236 31.14661 7.978234e-11 20.09062 230.7219
# hp -10.40489 31.47905 1.030354e-11 20.09062 146.6875
# gear 15.28179 31.92893 3.077106e-16 20.09062 3.6875
如果你需要比较不同的对(而不是全部对一个),那么可能是
vars <- c("mpg", "cyl", "disp", "hp", "gear")
eg <- expand.grid(vars, vars, stringsAsFactors = FALSE)
eg <- eg[ eg[,1] != eg[,2], ]
head(eg)
# Var1 Var2
# 2 cyl mpg
# 3 disp mpg
# 4 hp mpg
# 5 gear mpg
# 6 mpg cyl
# 8 disp cyl
ret <- do.call(
rbind.data.frame,
Map(function(x, y) {
out <- t.test(x, y)
c(out[c(1, 2, 3)], out[[5]])
}, mtcars[eg[,1]], mtcars[eg[,2]])
)
ret <- cbind(eg, ret)
head(ret)
# Var1 Var2 statistic parameter p.value mean.of.x mean.of.y
# 2 cyl mpg -12.51163 36.40239 9.507708e-15 6.18750 20.09062
# 3 disp mpg 9.60236 31.14661 7.978234e-11 230.72188 20.09062
# 4 hp mpg 10.40489 31.47905 1.030354e-11 146.68750 20.09062
# 5 gear mpg -15.28179 31.92893 3.077106e-16 3.68750 20.09062
# 6 mpg cyl 12.51163 36.40239 9.507708e-15 20.09062 6.18750
# 8 disp cyl 10.24721 31.01287 1.774454e-11 230.72188 6.18750
---
Note:
1. Iteratively build a frame row-by-row works fine logically and in small doses, but in the long run it performs very poorly: it makes a complete copy of the whole frame with each row, which is memory-inefficient (and slow).
2. The use of `attach` is discouraged, as I said in my comment. Also, `get` should be avoided as well, though perhaps to a lesser degree than `attach`.