对 data.frame 执行 t 检验
perform ttest on a data.frame
尝试从 data.frame 执行 ttest(并获得 p.value),有一列包含组(好与坏),其余列是数字。
我在这里生成了一个玩具数据集:
W <- rep(letters[seq( from = 1, to = 2)], 25)
X <- rnorm(n=50, mean = 10, sd = 5)
Y <- rnorm(n=50, mean = 15, sd = 6)
Z <- rnorm(n=50, mean = 20, sd = 5)
test_data <- data.frame(W, X, Y, Z)
然后我将数据转换成长格式:
melt_testdata <- melt(test_data)
并执行了t.test
lapply(unique(melt_testdata$variable),function(x){
Good <- subset(melt_testdata, W == 'a' & variable ==x)$variable
Bad <- subset(melt_testdata, W == 'b' & variable ==x)$variable
t.test(Good,Bad)$p.value
})
但我没有得到 t.test 结果,而是收到以下错误消息:
Error in if (stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) stop("data are essentially constant") :
missing value where TRUE/FALSE needed In addition: Warning messages:
1: In mean.default(x) : argument is not numeric or logical: returning NA
2: In var(x) :
Calling var(x) on a factor x is deprecated and will become an error.
Use something like 'all(duplicated(x)[-1L])' to test for a constant vector.
3: In mean.default(y) : argument is not numeric or logical: returning NA
4: In var(y) :
Calling var(x) on a factor x is deprecated and will become an error.
Use something like 'all(duplicated(x)[-1L])' to test for a constant vector.
然后我尝试写循环(第一次..)
good <- matrix(,50)
bad <- matrix(,50)
cnt=3
out <- rep(0,cnt)
for (i in 2:4){
good[i] <- subset(test_data, W == 'a', select= test_data[,i])
bad[i] <- subset(test_data, W == 'b', select= test_data[,i])
out[i] <- print(t.test(good[[i]], bad[[i]])$p.value)
}
仍然没有得到 p.values ......
这是错误消息
Error in x[j] : only 0's may be mixed with negative subscripts
感谢任何帮助,谢谢!
我认为 t.test
的 formula
方法会给你带来更好的运气。尝试
library(broom)
library(magrittr)
library(dplyr)
W <- rep(letters[seq( from = 1, to = 2)], 25)
X <- rnorm(n=50, mean = 10, sd = 5)
Y <- rnorm(n=50, mean = 15, sd = 6)
Z <- rnorm(n=50, mean = 20, sd = 5)
test_data <- data.frame(W, X, Y, Z)
lapply(test_data[c("X", "Y", "Z")],
function(x, y) t.test(x ~ y),
y = test_data[["W"]]) %>%
lapply(tidy) %>%
do.call("rbind", .) %>%
mutate(variable = rownames(.))
编辑:
严格遵守 dplyr
理念,您可以使用以下内容:实际上看起来更干净一些。
library(broom)
library(dplyr)
library(tidyr)
W <- rep(letters[seq( from = 1, to = 2)], 25)
X <- rnorm(n=50, mean = 10, sd = 5)
Y <- rnorm(n=50, mean = 15, sd = 6)
Z <- rnorm(n=50, mean = 20, sd = 5)
test_data <- data.frame(W, X, Y, Z)
test_data %>%
gather(variable, value, X:Z) %>%
group_by(variable) %>%
do(., tidy(t.test(value ~ W, data = .)))
这是一个使用 dplyr
和 t.test
的公式参数的解决方案。 do
对 group_by.
定义的每个组起作用 glance
从 t.test
输出中提取值并将它们制成 data.frame
.
library(tidyverse)
library(broom)
melt_testdata %>%
group_by(variable) %>%
do(glance(t.test(value ~ W, data = .)))
尝试从 data.frame 执行 ttest(并获得 p.value),有一列包含组(好与坏),其余列是数字。
我在这里生成了一个玩具数据集:
W <- rep(letters[seq( from = 1, to = 2)], 25)
X <- rnorm(n=50, mean = 10, sd = 5)
Y <- rnorm(n=50, mean = 15, sd = 6)
Z <- rnorm(n=50, mean = 20, sd = 5)
test_data <- data.frame(W, X, Y, Z)
然后我将数据转换成长格式:
melt_testdata <- melt(test_data)
并执行了t.test
lapply(unique(melt_testdata$variable),function(x){
Good <- subset(melt_testdata, W == 'a' & variable ==x)$variable
Bad <- subset(melt_testdata, W == 'b' & variable ==x)$variable
t.test(Good,Bad)$p.value
})
但我没有得到 t.test 结果,而是收到以下错误消息:
Error in if (stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) stop("data are essentially constant") :
missing value where TRUE/FALSE needed In addition: Warning messages:
1: In mean.default(x) : argument is not numeric or logical: returning NA
2: In var(x) :
Calling var(x) on a factor x is deprecated and will become an error.
Use something like 'all(duplicated(x)[-1L])' to test for a constant vector.
3: In mean.default(y) : argument is not numeric or logical: returning NA
4: In var(y) :
Calling var(x) on a factor x is deprecated and will become an error.
Use something like 'all(duplicated(x)[-1L])' to test for a constant vector.
然后我尝试写循环(第一次..)
good <- matrix(,50)
bad <- matrix(,50)
cnt=3
out <- rep(0,cnt)
for (i in 2:4){
good[i] <- subset(test_data, W == 'a', select= test_data[,i])
bad[i] <- subset(test_data, W == 'b', select= test_data[,i])
out[i] <- print(t.test(good[[i]], bad[[i]])$p.value)
}
仍然没有得到 p.values ...... 这是错误消息
Error in x[j] : only 0's may be mixed with negative subscripts
感谢任何帮助,谢谢!
我认为 t.test
的 formula
方法会给你带来更好的运气。尝试
library(broom)
library(magrittr)
library(dplyr)
W <- rep(letters[seq( from = 1, to = 2)], 25)
X <- rnorm(n=50, mean = 10, sd = 5)
Y <- rnorm(n=50, mean = 15, sd = 6)
Z <- rnorm(n=50, mean = 20, sd = 5)
test_data <- data.frame(W, X, Y, Z)
lapply(test_data[c("X", "Y", "Z")],
function(x, y) t.test(x ~ y),
y = test_data[["W"]]) %>%
lapply(tidy) %>%
do.call("rbind", .) %>%
mutate(variable = rownames(.))
编辑:
严格遵守 dplyr
理念,您可以使用以下内容:实际上看起来更干净一些。
library(broom)
library(dplyr)
library(tidyr)
W <- rep(letters[seq( from = 1, to = 2)], 25)
X <- rnorm(n=50, mean = 10, sd = 5)
Y <- rnorm(n=50, mean = 15, sd = 6)
Z <- rnorm(n=50, mean = 20, sd = 5)
test_data <- data.frame(W, X, Y, Z)
test_data %>%
gather(variable, value, X:Z) %>%
group_by(variable) %>%
do(., tidy(t.test(value ~ W, data = .)))
这是一个使用 dplyr
和 t.test
的公式参数的解决方案。 do
对 group_by.
定义的每个组起作用 glance
从 t.test
输出中提取值并将它们制成 data.frame
.
library(tidyverse)
library(broom)
melt_testdata %>%
group_by(variable) %>%
do(glance(t.test(value ~ W, data = .)))