从 post 个临时测试中获得整洁的输出
Getting tidy output from post hoc tests
考虑这个数据框 dat1
:
dat1 <- data.frame(Region = rep(c("r1","r2"), each = 100),
State = rep(c("NY","MA","FL","GA"), each = 10),
Loc = rep(c("a","b","c","d","e","f","g","h"),each = 5),
ID = rep(c(1:10), each=2),
var1 = rnorm(200),
var2 = rnorm(200),
var3 = rnorm(200),
var4 = rnorm(200),
var5 = rnorm(200))
我有类似于上面创建的 dat1
的数据框。 Region
、State
和 Loc
是每个观察值 ID
的分组变量,每个观察值 var1:var5
进行 5 次测量。对于每个分组变量,我对每个 var
进行单变量方差分析。当发现显着差异时,我使用 TukeyHSD()
函数和 multcompView
包中的 multcompLetters()
函数在组上生成 CLD。因为我想为每个分组变量执行此操作,所以我试图编写一个函数来防止自己重复和打错字。下面显示了我对此的看法:
library(tidyverse)
library(multcomp)
library(multcompView)
Tuk <- function(dat,groupvar,var){
TUK <- TukeyHSD(aov(lm(get(var) ~ get(groupvar), data=dat)))
names(TUK)[[1]] <- paste0(groupvar)
lets<-multcompLetters(extract_p(TUK$groupvar))
lets
}
#assuming all 5 vars were significant in the anovas, I would then run this for each grouping variable as follows:
vars <- paste0(names(dat1[,5:9]))
#by Region
lapply(vars, FUN=Tuk, dat=dat1, groupvar="Region")
#by State
lapply(vars, FUN=Tuk, dat=dat1, groupvar="State")
#by Loc
lapply(vars, FUN=Tuk, dat=dat1, groupvar="Loc")
代码在函数之外工作。该函数将创建模型,但我无法弄清楚如何格式化它以便识别 multcompLetters(extract_p())
部分的 groupvar
是什么?我怎样才能解决这个问题,我怎样才能让它输出一个整洁的 table 的函数,它显示每个组和我一次给它的每个变量的字母。例如,State
使用所有 5 个变量
看起来像这样
NY MA FL GA
var1 a ab c a
var2 a ab b c
var3 a c ab bc
var4 ab c ab ab
var5 a b c b
此外,是否有合理的方法使此函数生成显示 CLD 字母的组的箱线图(针对每个变量)?
假设情节确实是您正在寻找的内容,这是否让您非常接近 var1 ~ State 的单曲情节,Indrajeet 在构建这个包方面做得很好,我讨厌重新发明轮子。
dat1 <- data.frame(Region = rep(c("r1","r2"), each = 100),
State = rep(c("NY","MA","FL","GA"), each = 10),
Loc = rep(c("a","b","c","d","e","f","g","h"),each = 5),
ID = rep(c(1:10), each=2),
var1 = rnorm(200),
var2 = rnorm(200),
var3 = rnorm(200),
var4 = rnorm(200),
var5 = rnorm(200))
library(ggstatsplot)
ggbetweenstats(dat1, State, var1,
plot.type = "box",
pairwise.comparisons = TRUE,
pairwise.display = "everything")
#> Note: Shapiro-Wilk Normality Test for var1: p-value = 0.183
#>
#> Note: Bartlett's test for homogeneity of variances for factor State: p-value = 0.373
#>
您接受了答案,但只要记录下来,您只需做一点工作就可以得到您最初提出的要求...
#
library(multcompView)
library(dplyr)
library(purrr)
library(tidyselect)
set.seed(1111)
dat1 <- data.frame(Region = rep(c("r1","r2"), each = 100),
State = rep(c("NY","MA","FL","GA"), each = 10),
Loc = rep(c("a","b","c","d","e","f","g","h"),each = 5),
ID = rep(c(1:10), each = 2),
var1 = rnorm(200),
var2 = rnorm(200),
var3 = rnorm(200),
var4 = rnorm(200),
var5 = rnorm(200))
# You want just the letters which you can get by ...
multcompLetters(TukeyHSD(aov(var1 ~ State, data = dat1))$State[,4])$Letters
#> GA MA NY FL
#> "a" "a" "a" "a"
# Your function redone...
Tuk3 <- function(data,
groupvar,
var) {
lst <- as.list(match.call())
if (is.symbol(lst$groupvar) || is.symbol(lst$var)) {
stop("Please quote all variables")
}
if (!is.call(groupvar)) {
grouplabel <- rlang::as_name(rlang::enquo(groupvar))
}
data <-
dplyr::select(
.data = data,
var = {{ var }},
groupvar = {{ groupvar }}
)
aov_object <- aov(var ~ groupvar, data = data)
aov_results <- broom::tidy(aov_object) %>%
mutate(term = if_else(term != "Residuals", grouplabel, term))
tukey_results <- broom::tidy(TukeyHSD(aov_object)) %>%
mutate(term = grouplabel)
# multcompLetters is annoying and wants named vectors
p_values <- tukey_results %>% pull(adj.p.value)
names(p_values) <- tukey_results %>% pull(comparison)
letters_results <- data.frame(as.list(multcompLetters(p_values)$Letters))
return(letters_results)
}
# works for one
Tuk3(data = dat1, groupvar = "State", var = "var1")
#> GA MA NY FL
#> 1 a a a a
# you could do this manually but I do it a lot so I have a function
variables_list <- CGPfunctions::cross2_var_vectors(dat1, 1:3, 5:9)
# Make the names nice
outcomes2 <- variables_list$lista
groupings2 <- variables_list$listb
names(groupings2) <- unlist(groupings2)
names(outcomes2) <- paste(unlist(outcomes2), "~", unlist(groupings2))
# get all 15 results and a final map_dfr to make one tibble
map2(.x = outcomes2,
.y = groupings2,
.f = ~ Tuk3(dat = dat1,
var = tidyselect::all_of(.x),
groupvar = tidyselect::all_of(.y))) %>%
map_dfr(~ rbind(.), .id = "Which_ANOVA")
#> Which_ANOVA r2 r1 GA MA NY FL b c d e f g
#> 1 var1 ~ Region a a <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 2 var2 ~ Region a a <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 3 var3 ~ Region a a <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 4 var4 ~ Region a a <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 5 var5 ~ Region a a <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 6 var1 ~ State <NA> <NA> a a a a <NA> <NA> <NA> <NA> <NA> <NA>
#> 7 var2 ~ State <NA> <NA> a a a a <NA> <NA> <NA> <NA> <NA> <NA>
#> 8 var3 ~ State <NA> <NA> a a a a <NA> <NA> <NA> <NA> <NA> <NA>
#> 9 var4 ~ State <NA> <NA> a a a a <NA> <NA> <NA> <NA> <NA> <NA>
#> 10 var5 ~ State <NA> <NA> a b ab ab <NA> <NA> <NA> <NA> <NA> <NA>
#> 11 var1 ~ Loc <NA> <NA> <NA> <NA> <NA> <NA> a a a a a a
#> 12 var2 ~ Loc <NA> <NA> <NA> <NA> <NA> <NA> a a a a a a
#> 13 var3 ~ Loc <NA> <NA> <NA> <NA> <NA> <NA> a a a a a a
#> 14 var4 ~ Loc <NA> <NA> <NA> <NA> <NA> <NA> a a a a a a
#> 15 var5 ~ Loc <NA> <NA> <NA> <NA> <NA> <NA> ab ab ab a ab b
我截了结果
考虑这个数据框 dat1
:
dat1 <- data.frame(Region = rep(c("r1","r2"), each = 100),
State = rep(c("NY","MA","FL","GA"), each = 10),
Loc = rep(c("a","b","c","d","e","f","g","h"),each = 5),
ID = rep(c(1:10), each=2),
var1 = rnorm(200),
var2 = rnorm(200),
var3 = rnorm(200),
var4 = rnorm(200),
var5 = rnorm(200))
我有类似于上面创建的 dat1
的数据框。 Region
、State
和 Loc
是每个观察值 ID
的分组变量,每个观察值 var1:var5
进行 5 次测量。对于每个分组变量,我对每个 var
进行单变量方差分析。当发现显着差异时,我使用 TukeyHSD()
函数和 multcompView
包中的 multcompLetters()
函数在组上生成 CLD。因为我想为每个分组变量执行此操作,所以我试图编写一个函数来防止自己重复和打错字。下面显示了我对此的看法:
library(tidyverse)
library(multcomp)
library(multcompView)
Tuk <- function(dat,groupvar,var){
TUK <- TukeyHSD(aov(lm(get(var) ~ get(groupvar), data=dat)))
names(TUK)[[1]] <- paste0(groupvar)
lets<-multcompLetters(extract_p(TUK$groupvar))
lets
}
#assuming all 5 vars were significant in the anovas, I would then run this for each grouping variable as follows:
vars <- paste0(names(dat1[,5:9]))
#by Region
lapply(vars, FUN=Tuk, dat=dat1, groupvar="Region")
#by State
lapply(vars, FUN=Tuk, dat=dat1, groupvar="State")
#by Loc
lapply(vars, FUN=Tuk, dat=dat1, groupvar="Loc")
代码在函数之外工作。该函数将创建模型,但我无法弄清楚如何格式化它以便识别 multcompLetters(extract_p())
部分的 groupvar
是什么?我怎样才能解决这个问题,我怎样才能让它输出一个整洁的 table 的函数,它显示每个组和我一次给它的每个变量的字母。例如,State
使用所有 5 个变量
NY MA FL GA
var1 a ab c a
var2 a ab b c
var3 a c ab bc
var4 ab c ab ab
var5 a b c b
此外,是否有合理的方法使此函数生成显示 CLD 字母的组的箱线图(针对每个变量)?
假设情节确实是您正在寻找的内容,这是否让您非常接近 var1 ~ State 的单曲情节,Indrajeet 在构建这个包方面做得很好,我讨厌重新发明轮子。
dat1 <- data.frame(Region = rep(c("r1","r2"), each = 100),
State = rep(c("NY","MA","FL","GA"), each = 10),
Loc = rep(c("a","b","c","d","e","f","g","h"),each = 5),
ID = rep(c(1:10), each=2),
var1 = rnorm(200),
var2 = rnorm(200),
var3 = rnorm(200),
var4 = rnorm(200),
var5 = rnorm(200))
library(ggstatsplot)
ggbetweenstats(dat1, State, var1,
plot.type = "box",
pairwise.comparisons = TRUE,
pairwise.display = "everything")
#> Note: Shapiro-Wilk Normality Test for var1: p-value = 0.183
#>
#> Note: Bartlett's test for homogeneity of variances for factor State: p-value = 0.373
#>
您接受了答案,但只要记录下来,您只需做一点工作就可以得到您最初提出的要求...
#
library(multcompView)
library(dplyr)
library(purrr)
library(tidyselect)
set.seed(1111)
dat1 <- data.frame(Region = rep(c("r1","r2"), each = 100),
State = rep(c("NY","MA","FL","GA"), each = 10),
Loc = rep(c("a","b","c","d","e","f","g","h"),each = 5),
ID = rep(c(1:10), each = 2),
var1 = rnorm(200),
var2 = rnorm(200),
var3 = rnorm(200),
var4 = rnorm(200),
var5 = rnorm(200))
# You want just the letters which you can get by ...
multcompLetters(TukeyHSD(aov(var1 ~ State, data = dat1))$State[,4])$Letters
#> GA MA NY FL
#> "a" "a" "a" "a"
# Your function redone...
Tuk3 <- function(data,
groupvar,
var) {
lst <- as.list(match.call())
if (is.symbol(lst$groupvar) || is.symbol(lst$var)) {
stop("Please quote all variables")
}
if (!is.call(groupvar)) {
grouplabel <- rlang::as_name(rlang::enquo(groupvar))
}
data <-
dplyr::select(
.data = data,
var = {{ var }},
groupvar = {{ groupvar }}
)
aov_object <- aov(var ~ groupvar, data = data)
aov_results <- broom::tidy(aov_object) %>%
mutate(term = if_else(term != "Residuals", grouplabel, term))
tukey_results <- broom::tidy(TukeyHSD(aov_object)) %>%
mutate(term = grouplabel)
# multcompLetters is annoying and wants named vectors
p_values <- tukey_results %>% pull(adj.p.value)
names(p_values) <- tukey_results %>% pull(comparison)
letters_results <- data.frame(as.list(multcompLetters(p_values)$Letters))
return(letters_results)
}
# works for one
Tuk3(data = dat1, groupvar = "State", var = "var1")
#> GA MA NY FL
#> 1 a a a a
# you could do this manually but I do it a lot so I have a function
variables_list <- CGPfunctions::cross2_var_vectors(dat1, 1:3, 5:9)
# Make the names nice
outcomes2 <- variables_list$lista
groupings2 <- variables_list$listb
names(groupings2) <- unlist(groupings2)
names(outcomes2) <- paste(unlist(outcomes2), "~", unlist(groupings2))
# get all 15 results and a final map_dfr to make one tibble
map2(.x = outcomes2,
.y = groupings2,
.f = ~ Tuk3(dat = dat1,
var = tidyselect::all_of(.x),
groupvar = tidyselect::all_of(.y))) %>%
map_dfr(~ rbind(.), .id = "Which_ANOVA")
#> Which_ANOVA r2 r1 GA MA NY FL b c d e f g
#> 1 var1 ~ Region a a <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 2 var2 ~ Region a a <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 3 var3 ~ Region a a <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 4 var4 ~ Region a a <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 5 var5 ~ Region a a <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 6 var1 ~ State <NA> <NA> a a a a <NA> <NA> <NA> <NA> <NA> <NA>
#> 7 var2 ~ State <NA> <NA> a a a a <NA> <NA> <NA> <NA> <NA> <NA>
#> 8 var3 ~ State <NA> <NA> a a a a <NA> <NA> <NA> <NA> <NA> <NA>
#> 9 var4 ~ State <NA> <NA> a a a a <NA> <NA> <NA> <NA> <NA> <NA>
#> 10 var5 ~ State <NA> <NA> a b ab ab <NA> <NA> <NA> <NA> <NA> <NA>
#> 11 var1 ~ Loc <NA> <NA> <NA> <NA> <NA> <NA> a a a a a a
#> 12 var2 ~ Loc <NA> <NA> <NA> <NA> <NA> <NA> a a a a a a
#> 13 var3 ~ Loc <NA> <NA> <NA> <NA> <NA> <NA> a a a a a a
#> 14 var4 ~ Loc <NA> <NA> <NA> <NA> <NA> <NA> a a a a a a
#> 15 var5 ~ Loc <NA> <NA> <NA> <NA> <NA> <NA> ab ab ab a ab b
我截了结果