group_by %>% anova_test DFn DFd 不一致
group_by %>% anova_test inconsistency in DFn DFd
我有一个不平衡的重复测量设计,我想 运行 为每个时间段(即曲线)分开方差分析,然后 Bonferroni 校正结果。
这是数据,其中 Curve 是重复测量:
T_data <-structure(list(mod_id = c(1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6,
1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6,
7, 8, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6,
1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6,
7, 8, 1, 2, 3, 4, 5, 6, 7, 8), Curve = structure(c(3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L), .Label = c("First", "Second", "Third", "Fourth", "Fifth",
"Sixth"), class = "factor"), Treatment = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L), .Label = c("GH", "T1", "T2", "T3"), class = "factor"),
Topt = c(28.85, 29.83, 29.89, 28.26, 29.2, 29.1, 31.06, 32.24,
33.03, 31.1, 32.51, 31.91, 31.42, 31.92, 32.02, 33.75, 32.87,
32.76, 28.15, 28.2, 30.89, 29.62, 29.74, 29.36, 29.41, 28.36,
29.41, 32.53, 33.03, 31.44, 31.15, 32.15, 32.87, 30.79, 32.75,
32.75, 35.02, 30.34, 33.68, 35.01, 32.61, 31.16, 32.11, 30.28,
30, 31.86, 29.49, 28.96, 31.29, 32.11, 30.98, 31.92, 31.41,
31.09, 32.9, 32.54, 33.16, 33.99, 34.18, 34.14, 28.67, 26.96,
27.9, 24.8, 30.76, 28.56, 29.05, 27.08, 29.32, 32.96, 34.25,
34.25, 32.17, 31.4, 31.09, 34.68, 33.65, 33.96, 33.04, 33.12,
34, 33.18, 34.3, 34.46, 34.02), A_at_Topt = c(20.36, 18.25,
18.62, 15.51, 21.39, 16.95, 21.73, 14.43, 16.29, 16.52, 17.65,
18.68, 22.13, 21.77, 20.97, 17.75, 19.83, 18.32, 12.6, 17.72,
16.91, 19.22, 19.05, 20.49, 16.36, 16.81, 16.48, 21.29, 19.92,
18.2, 16.09, 21.56, 19.56, 17.09, 16.71, 20.65, 20.2, 25.19,
21.46, 22.63, 22.18, 21.9, 17.86, 16.34, 17.85, 16.25, 20.65,
22.92, 19.16, 17.77, 19.5, 20.1, 21.5, 24.58, 22.88, 14.97,
20.52, 22.77, 19.96, 16.91, 17.82, 18, 13.13, 16.43, 13.09,
11.07, 7.2, 12.87, 12.99, 17.28, 17.04, 21.78, 19.2, 16.42,
18.35, 12.51, 18.72, 17.01, 17.75, 19.62, 19.28, 15.32, 19.24,
17.22, 17.6)), row.names = c(NA, -85L), class = c("tbl_df",
"tbl", "data.frame"))
我遇到的这个问题与 'rstatix' 包和 anova_test() 函数有关。 运行对于 Topt 变量来说刚刚好。
library(rstatix)
Topt_bonf <- T_data %>%
group_by(Curve) %>%
anova_test(dv = Topt, wid = mod_id, within = Treatment) %>%
get_anova_table() %>%
adjust_pvalue(method = "bonferroni")
Topt_bonf
这给出:
Curve
DFn
Dfd
Third
2
10
Fourth
2
12
Fifth
2
10
Sixth
2
14
但是,相同的代码对 Aopt 变量给出了一个奇怪的结果,其中 DFn 和 DFd 对于 Curve = "Fifth" 是不正确的。
Aopt_bonf <- T_data %>%
group_by(Curve) %>%
anova_test(dv = A_at_Topt, wid = mod_id, within = Treatment) %>%
get_anova_table() %>%
adjust_pvalue(method = "bonferroni")
Aopt_bonf
这给出:
Curve
DFn
Dfd
Third
2
10
Fourth
2
12
Fifth
1.07
5.35
Sixth
2
14
有什么想法吗?谢谢
问题是 anova_test() 函数没有公式参数,所以它变得混乱了。
使用 dv~var lm 符号解决了这个问题。
见下文post:
Aopt_bonf <- T_data %>%
group_by(Curve) %>%
anova_test(A_at_Topt~Treatment, wid = mod_id, within = Treatment) %>%
get_anova_table() %>%
adjust_pvalue(method = "bonferroni")
Aopt_bonf
您看到的问题不是使用 group_by()
的结果。相反,这是由于 Greenhouse-Geisser 修正。所以,让我们取 'T_data' 的一个子集来简化。下面我们来看数据
曲线的子集 == 'Fifth'.
# subset
T_data_sub <- T_data[T_data$Curve %in% "Fifth",]
将此与您的原始代码一起使用,anova_test()
正在执行 III 型方差分析,
基于 car::Anova()
函数。
> anova_test(data = T_data_sub, dv = A_at_Topt , wid = mod_id, within = Treatment)
ANOVA Table (type III tests)
$ANOVA
Effect DFn DFd F p p<.05 ges
1 Treatment 2 10 0.726 0.508 0.079
$`Mauchly's Test for Sphericity`
Effect W p p<.05
1 Treatment 0.131 0.017 *
$`Sphericity Corrections` # HERE IS WHERE THE DIFFERENT DEGREES OF FREEDOM COME FROM
Effect GGe DF[GG] p[GG] p[GG]<.05 HFe DF[HF] p[HF] p[HF]<.05
1 Treatment 0.535 1.07, 5.35 0.44 0.562 1.12, 5.62 0.446
因为 Mauchly 球形检验的 p 值很重要,所以使用 Greenhouse-Geisser 校正调整了自由度。这就是为什么您的第二个屏幕截图与第一个屏幕截图具有不同的自由度。
如果您改为指定公式来指定模型,anova_test()
会根据 stats:aov() 执行 II 型方差分析。
> anova_test(data = T_data_sub, A_at_Topt ~ Treatment, wid = mod_id, within = Treatment)
Coefficient covariances computed by hccm()
ANOVA Table (type II tests)
Effect DFn DFd F p p<.05 ges
1 Treatment 2 15 0.642 0.54 0.079
请注意,在这种情况下,wid
和 within
参数不会影响使用公式时的输出。它只是在进行单向方差分析。
> anova_test(data = T_data_sub, A_at_Topt ~ Treatment)
Coefficient covariances computed by hccm()
ANOVA Table (type II tests)
Effect DFn DFd F p p<.05 ges
1 Treatment 2 15 0.642 0.54 0.079
我有一个不平衡的重复测量设计,我想 运行 为每个时间段(即曲线)分开方差分析,然后 Bonferroni 校正结果。
这是数据,其中 Curve 是重复测量:
T_data <-structure(list(mod_id = c(1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6,
1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6,
7, 8, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6,
1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6,
7, 8, 1, 2, 3, 4, 5, 6, 7, 8), Curve = structure(c(3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L), .Label = c("First", "Second", "Third", "Fourth", "Fifth",
"Sixth"), class = "factor"), Treatment = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L), .Label = c("GH", "T1", "T2", "T3"), class = "factor"),
Topt = c(28.85, 29.83, 29.89, 28.26, 29.2, 29.1, 31.06, 32.24,
33.03, 31.1, 32.51, 31.91, 31.42, 31.92, 32.02, 33.75, 32.87,
32.76, 28.15, 28.2, 30.89, 29.62, 29.74, 29.36, 29.41, 28.36,
29.41, 32.53, 33.03, 31.44, 31.15, 32.15, 32.87, 30.79, 32.75,
32.75, 35.02, 30.34, 33.68, 35.01, 32.61, 31.16, 32.11, 30.28,
30, 31.86, 29.49, 28.96, 31.29, 32.11, 30.98, 31.92, 31.41,
31.09, 32.9, 32.54, 33.16, 33.99, 34.18, 34.14, 28.67, 26.96,
27.9, 24.8, 30.76, 28.56, 29.05, 27.08, 29.32, 32.96, 34.25,
34.25, 32.17, 31.4, 31.09, 34.68, 33.65, 33.96, 33.04, 33.12,
34, 33.18, 34.3, 34.46, 34.02), A_at_Topt = c(20.36, 18.25,
18.62, 15.51, 21.39, 16.95, 21.73, 14.43, 16.29, 16.52, 17.65,
18.68, 22.13, 21.77, 20.97, 17.75, 19.83, 18.32, 12.6, 17.72,
16.91, 19.22, 19.05, 20.49, 16.36, 16.81, 16.48, 21.29, 19.92,
18.2, 16.09, 21.56, 19.56, 17.09, 16.71, 20.65, 20.2, 25.19,
21.46, 22.63, 22.18, 21.9, 17.86, 16.34, 17.85, 16.25, 20.65,
22.92, 19.16, 17.77, 19.5, 20.1, 21.5, 24.58, 22.88, 14.97,
20.52, 22.77, 19.96, 16.91, 17.82, 18, 13.13, 16.43, 13.09,
11.07, 7.2, 12.87, 12.99, 17.28, 17.04, 21.78, 19.2, 16.42,
18.35, 12.51, 18.72, 17.01, 17.75, 19.62, 19.28, 15.32, 19.24,
17.22, 17.6)), row.names = c(NA, -85L), class = c("tbl_df",
"tbl", "data.frame"))
我遇到的这个问题与 'rstatix' 包和 anova_test() 函数有关。 运行对于 Topt 变量来说刚刚好。
library(rstatix)
Topt_bonf <- T_data %>%
group_by(Curve) %>%
anova_test(dv = Topt, wid = mod_id, within = Treatment) %>%
get_anova_table() %>%
adjust_pvalue(method = "bonferroni")
Topt_bonf
这给出:
Curve | DFn | Dfd |
---|---|---|
Third | 2 | 10 |
Fourth | 2 | 12 |
Fifth | 2 | 10 |
Sixth | 2 | 14 |
但是,相同的代码对 Aopt 变量给出了一个奇怪的结果,其中 DFn 和 DFd 对于 Curve = "Fifth" 是不正确的。
Aopt_bonf <- T_data %>%
group_by(Curve) %>%
anova_test(dv = A_at_Topt, wid = mod_id, within = Treatment) %>%
get_anova_table() %>%
adjust_pvalue(method = "bonferroni")
Aopt_bonf
这给出:
Curve | DFn | Dfd |
---|---|---|
Third | 2 | 10 |
Fourth | 2 | 12 |
Fifth | 1.07 | 5.35 |
Sixth | 2 | 14 |
有什么想法吗?谢谢
问题是 anova_test() 函数没有公式参数,所以它变得混乱了。
使用 dv~var lm 符号解决了这个问题。
见下文post:
Aopt_bonf <- T_data %>%
group_by(Curve) %>%
anova_test(A_at_Topt~Treatment, wid = mod_id, within = Treatment) %>%
get_anova_table() %>%
adjust_pvalue(method = "bonferroni")
Aopt_bonf
您看到的问题不是使用 group_by()
的结果。相反,这是由于 Greenhouse-Geisser 修正。所以,让我们取 'T_data' 的一个子集来简化。下面我们来看数据
曲线的子集 == 'Fifth'.
# subset
T_data_sub <- T_data[T_data$Curve %in% "Fifth",]
将此与您的原始代码一起使用,anova_test()
正在执行 III 型方差分析,
基于 car::Anova()
函数。
> anova_test(data = T_data_sub, dv = A_at_Topt , wid = mod_id, within = Treatment)
ANOVA Table (type III tests)
$ANOVA
Effect DFn DFd F p p<.05 ges
1 Treatment 2 10 0.726 0.508 0.079
$`Mauchly's Test for Sphericity`
Effect W p p<.05
1 Treatment 0.131 0.017 *
$`Sphericity Corrections` # HERE IS WHERE THE DIFFERENT DEGREES OF FREEDOM COME FROM
Effect GGe DF[GG] p[GG] p[GG]<.05 HFe DF[HF] p[HF] p[HF]<.05
1 Treatment 0.535 1.07, 5.35 0.44 0.562 1.12, 5.62 0.446
因为 Mauchly 球形检验的 p 值很重要,所以使用 Greenhouse-Geisser 校正调整了自由度。这就是为什么您的第二个屏幕截图与第一个屏幕截图具有不同的自由度。
如果您改为指定公式来指定模型,anova_test()
会根据 stats:aov() 执行 II 型方差分析。
> anova_test(data = T_data_sub, A_at_Topt ~ Treatment, wid = mod_id, within = Treatment)
Coefficient covariances computed by hccm()
ANOVA Table (type II tests)
Effect DFn DFd F p p<.05 ges
1 Treatment 2 15 0.642 0.54 0.079
请注意,在这种情况下,wid
和 within
参数不会影响使用公式时的输出。它只是在进行单向方差分析。
> anova_test(data = T_data_sub, A_at_Topt ~ Treatment)
Coefficient covariances computed by hccm()
ANOVA Table (type II tests)
Effect DFn DFd F p p<.05 ges
1 Treatment 2 15 0.642 0.54 0.079