从输出方差分析(汽车包装)中提取 P 值列
Extracting P-value column from output Anova (car package)
我正在使用 'car' 包函数 Anova 进行一些统计测试。
它给出以下输出:
Y = cbind(curdata$V1, curdata$V2, curdata$V3)
mymdl = lm(Y ~ curdata$V4 + curdata$V5)
myanova = Anova(mymdl)
Type II MANOVA Tests: Pillai test statistic
Df test stat approx F num Df den Df Pr(>F)
curdata$V4 1 0.27941 2.9728 3 23 0.05280 .
curdata$V5 1 0.33570 3.8743 3 23 0.02228 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
我想提取 'Pr(>F)' 列中的值,这样我就可以将这些 p 值放在另一个矩阵中,以便以后进行多重比较校正。
我已经尝试使用 unlist,但它仍然没有提供列中找到的 p 值。
如有任何帮助,我们将不胜感激。
如果我们有多个响应变量,那就是Manova
。我们可以捕获输出并使用 regex
as.numeric(sub(".*\s*(\d+\.[0-9e-]+)\s*[*.]*", "\1", capture.output(out)[4:5]))
#[1] 8.836e-06 2.200e-16
数据
mymdl <- lm(cbind(Sepal.Length, Sepal.Width) ~ Petal.Width +
Petal.Length, data = iris)
out <- Anova(mymdl)
也许不是最实用的方法,但您可以使用 tidyr
中的 separate()
围绕列进行操作:
library(car)
library(dplyr)
library(tidyr)
#Code
v1 <- data.frame(capture.output(myanova))
v1 <- v1[3:5,,drop=F]
names(v1)<-'v1'
v2 <- separate(v1,v1,c(paste0('v',1:21)),sep = '\s')
v2 <- v2[-1,]
输出:
as.numeric(v2$v21)
[1] 8.836e-06 2.200e-16
警告:如果捕获操作中存在更多列,则需要根据需要更改 1:21
。
TLDR:
# define helper:
get_summary_for_print <- car:::print.Anova.mlm
body(get_summary_for_print) <- local({tmp <- body(get_summary_for_print);tmp[-(length(tmp)-(0:1))]})
#use it:
get_summary_for_print(Anova(mymdl))$`Pr(>F)`
不幸的是there is no designated way。但是您可以查看 car:::print.Anova.mlm
的来源(通过在 R
控制台中键入)以了解它如何获取您想要的值:
function (x, ...)
{
if ((!is.null(x$singular)) && x$singular)
stop("singular error SSP matrix; multivariate tests unavailable\ntry summary(object, multivariate=FALSE)")
test <- x$test
repeated <- x$repeated
ntests <- length(x$terms)
tests <- matrix(NA, ntests, 4)
if (!repeated)
SSPE.qr <- qr(x$SSPE)
for (term in 1:ntests) {
eigs <- Re(eigen(qr.coef(if (repeated) qr(x$SSPE[[term]]) else SSPE.qr,
x$SSP[[term]]), symmetric = FALSE)$values)
tests[term, 1:4] <- switch(test, Pillai = Pillai(eigs,
x$df[term], x$error.df), Wilks = Wilks(eigs, x$df[term],
x$error.df), `Hotelling-Lawley` = HL(eigs, x$df[term],
x$error.df), Roy = Roy(eigs, x$df[term], x$error.df))
}
ok <- tests[, 2] >= 0 & tests[, 3] > 0 & tests[, 4] > 0
ok <- !is.na(ok) & ok
tests <- cbind(x$df, tests, pf(tests[ok, 2], tests[ok, 3],
tests[ok, 4], lower.tail = FALSE))
rownames(tests) <- x$terms
colnames(tests) <- c("Df", "test stat", "approx F", "num Df",
"den Df", "Pr(>F)")
tests <- structure(as.data.frame(tests), heading = paste("\nType ",
x$type, if (repeated)
" Repeated Measures", " MANOVA Tests: ", test, " test statistic",
sep = ""), class = c("anova", "data.frame"))
print(tests, ...)
invisible(x)
}
<bytecode: 0x56032ea80990>
<environment: namespace:car>
在这种情况下,计算 p 值涉及相当多的代码行。但是,我们可以轻松地创建 print
函数的修改版本 return table (tests
) 而不是仅打印它 (print(tests, ...)
) 和 returning 原始对象 (invisible(x)
):
get_summary_for_print <- car:::print.Anova.mlm # copy the original print function (inclusive environment)
body(get_summary_for_print) <- # replace the code of our copy
local({ # to avoid pollution of environment by tmp
tmp <- body(get_summary_for_print) # to avoid code duplication
tmp[-(length(tmp)-(0:1))] # remove the last two code lines of the function
})
并像这样使用它:
library(car)
#> Loading required package: carData
res <- Anova(lm(cbind(Sepal.Width, Sepal.Length, Petal.Width) ~ Species + Petal.Length, iris))
res
#>
#> Type II MANOVA Tests: Pillai test statistic
#> Df test stat approx F num Df den Df Pr(>F)
#> Species 2 0.70215 26.149 6 290 < 2.2e-16 ***
#> Petal.Length 1 0.63487 83.461 3 144 < 2.2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
str(get_summary_for_print(res))
#> Classes 'anova' and 'data.frame': 2 obs. of 6 variables:
#> $ Df : num 2 1
#> $ test stat: num 0.702 0.635
#> $ approx F : num 26.1 83.5
#> $ num Df : num 6 3
#> $ den Df : num 290 144
#> $ Pr(>F) : num 7.96e-25 2.41e-31
#> - attr(*, "heading")= chr "\nType II MANOVA Tests: Pillai test statistic"
我正在使用 'car' 包函数 Anova 进行一些统计测试。
它给出以下输出:
Y = cbind(curdata$V1, curdata$V2, curdata$V3)
mymdl = lm(Y ~ curdata$V4 + curdata$V5)
myanova = Anova(mymdl)
Type II MANOVA Tests: Pillai test statistic
Df test stat approx F num Df den Df Pr(>F)
curdata$V4 1 0.27941 2.9728 3 23 0.05280 .
curdata$V5 1 0.33570 3.8743 3 23 0.02228 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
我想提取 'Pr(>F)' 列中的值,这样我就可以将这些 p 值放在另一个矩阵中,以便以后进行多重比较校正。
我已经尝试使用 unlist,但它仍然没有提供列中找到的 p 值。
如有任何帮助,我们将不胜感激。
如果我们有多个响应变量,那就是Manova
。我们可以捕获输出并使用 regex
as.numeric(sub(".*\s*(\d+\.[0-9e-]+)\s*[*.]*", "\1", capture.output(out)[4:5]))
#[1] 8.836e-06 2.200e-16
数据
mymdl <- lm(cbind(Sepal.Length, Sepal.Width) ~ Petal.Width +
Petal.Length, data = iris)
out <- Anova(mymdl)
也许不是最实用的方法,但您可以使用 tidyr
中的 separate()
围绕列进行操作:
library(car)
library(dplyr)
library(tidyr)
#Code
v1 <- data.frame(capture.output(myanova))
v1 <- v1[3:5,,drop=F]
names(v1)<-'v1'
v2 <- separate(v1,v1,c(paste0('v',1:21)),sep = '\s')
v2 <- v2[-1,]
输出:
as.numeric(v2$v21)
[1] 8.836e-06 2.200e-16
警告:如果捕获操作中存在更多列,则需要根据需要更改 1:21
。
TLDR:
# define helper:
get_summary_for_print <- car:::print.Anova.mlm
body(get_summary_for_print) <- local({tmp <- body(get_summary_for_print);tmp[-(length(tmp)-(0:1))]})
#use it:
get_summary_for_print(Anova(mymdl))$`Pr(>F)`
不幸的是there is no designated way。但是您可以查看 car:::print.Anova.mlm
的来源(通过在 R
控制台中键入)以了解它如何获取您想要的值:
function (x, ...)
{
if ((!is.null(x$singular)) && x$singular)
stop("singular error SSP matrix; multivariate tests unavailable\ntry summary(object, multivariate=FALSE)")
test <- x$test
repeated <- x$repeated
ntests <- length(x$terms)
tests <- matrix(NA, ntests, 4)
if (!repeated)
SSPE.qr <- qr(x$SSPE)
for (term in 1:ntests) {
eigs <- Re(eigen(qr.coef(if (repeated) qr(x$SSPE[[term]]) else SSPE.qr,
x$SSP[[term]]), symmetric = FALSE)$values)
tests[term, 1:4] <- switch(test, Pillai = Pillai(eigs,
x$df[term], x$error.df), Wilks = Wilks(eigs, x$df[term],
x$error.df), `Hotelling-Lawley` = HL(eigs, x$df[term],
x$error.df), Roy = Roy(eigs, x$df[term], x$error.df))
}
ok <- tests[, 2] >= 0 & tests[, 3] > 0 & tests[, 4] > 0
ok <- !is.na(ok) & ok
tests <- cbind(x$df, tests, pf(tests[ok, 2], tests[ok, 3],
tests[ok, 4], lower.tail = FALSE))
rownames(tests) <- x$terms
colnames(tests) <- c("Df", "test stat", "approx F", "num Df",
"den Df", "Pr(>F)")
tests <- structure(as.data.frame(tests), heading = paste("\nType ",
x$type, if (repeated)
" Repeated Measures", " MANOVA Tests: ", test, " test statistic",
sep = ""), class = c("anova", "data.frame"))
print(tests, ...)
invisible(x)
}
<bytecode: 0x56032ea80990>
<environment: namespace:car>
在这种情况下,计算 p 值涉及相当多的代码行。但是,我们可以轻松地创建 print
函数的修改版本 return table (tests
) 而不是仅打印它 (print(tests, ...)
) 和 returning 原始对象 (invisible(x)
):
get_summary_for_print <- car:::print.Anova.mlm # copy the original print function (inclusive environment)
body(get_summary_for_print) <- # replace the code of our copy
local({ # to avoid pollution of environment by tmp
tmp <- body(get_summary_for_print) # to avoid code duplication
tmp[-(length(tmp)-(0:1))] # remove the last two code lines of the function
})
并像这样使用它:
library(car)
#> Loading required package: carData
res <- Anova(lm(cbind(Sepal.Width, Sepal.Length, Petal.Width) ~ Species + Petal.Length, iris))
res
#>
#> Type II MANOVA Tests: Pillai test statistic
#> Df test stat approx F num Df den Df Pr(>F)
#> Species 2 0.70215 26.149 6 290 < 2.2e-16 ***
#> Petal.Length 1 0.63487 83.461 3 144 < 2.2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
str(get_summary_for_print(res))
#> Classes 'anova' and 'data.frame': 2 obs. of 6 variables:
#> $ Df : num 2 1
#> $ test stat: num 0.702 0.635
#> $ approx F : num 26.1 83.5
#> $ num Df : num 6 3
#> $ den Df : num 290 144
#> $ Pr(>F) : num 7.96e-25 2.41e-31
#> - attr(*, "heading")= chr "\nType II MANOVA Tests: Pillai test statistic"