运行 数据框中各列的方差分析
Running ANOVAs across columns within a dataframe
我有以下数据框:
df<- structure(list(Group.1 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L), BLC = c(10.9890294366989, 7.31930466605672,
13.6185172644819, 2.7266530334015, 3.53565114908662, 7.20597804412166,
2.78164116828929, 7.59371098030222, 14.7343839844163, 2.9806259314456,
5.07619453154234, 6.88503820786366, 12.2882487654356, 1.40646976090014,
19.307342679726, 15.0249870253821, 4.34581364475618, 5.03491248395278,
7.94957003082448, 6.84343434343434, 11.2622383086214, 11.1839711729262,
4.7669094503789, 3.09762397594833, 8.10311438552811, 0, 0, 0,
0, 0), BLG = c(53.2196490874651, 23.9543988057977, 46.2826752583327,
34.9096813849679, 27.0376341826749, 49.2472186166963, 93.0631982759938,
46.1366527251764, 57.6460095990237, 36.5835422650789, 56.2627854701592,
30.1133129448127, 22.2997436361558, 28.9793549481134, 37.6201098690056,
59.8627031558285, 34.7171109184231, 48.9414623325316, 31.5061417556072,
21.2521546878513, 70.8263794749462, 24.930952093699, 39.307162693975,
28.9144148451338, 42.9157121339545, 3.94736842105263, 3.94736842105263,
3.94736842105263, 3.94736842105263, 3.94736842105263), LMB = c(75.2718187185061,
42.707035200077, 31.37371428004, 24.9294274297168, 21.619318105277,
19.8056309622509, 62.5665072062847, 30.2395840472535, 36.2246969501391,
16.053874321678, 73.325176836826, 32.1599744373439, 33.8619234899393,
39.1278597999347, 29.242123346214, 50.3372863653836, 23.3365756853847,
61.7018803213189, 18.2047745554517, 40.1231815267265, 36.2849916132823,
35.4393881210482, 41.6277079218274, 27.5840809362335, 14.5766262544513,
19.7368421052632, 19.7368421052632, 19.7368421052632, 19.7368421052632,
19.7368421052632), RSF = c(7.7061355134565, 6.57544180671257,
21.6485001821173, 14.3568910671585, 3.53565114908662, 2.89876366994815,
10.1680661480383, 17.3890884998598, 6.45810108311722, 2.95766439639045,
13.7591229373968, 21.1086837581149, 3.65965233302836, 26.151881306845,
9.17122695497959, 16.6545469585419, 8.26685329264933, 9.3745854643381,
1.4903129657228, 23.6018678125026, 7.04954072403232, 11.1546894959865,
20.5987856222152, 8.10190710702138, 4.41849566570698, 3.94736842105263,
3.94736842105263, 3.94736842105263, 3.94736842105263, 3.94736842105263
), GSF = c(0, 0, 0, 2.51341949455157, 8.34660636193077, 3.23362974939369,
2.85602204934611, 3.23362974939369, 3.63636363636364, 3.9344262295082,
0, 0, 3.9344262295082, 0, 1.46520146520147, 0, 0, 0, 3.9344262295082,
3.63636363636364, 1.46342316809685, 0.879120879120879, 0, 0,
2.36065573770492, 0, 0, 0, 0, 0), CCF = c(0, 0, 0, 3.14465408805032,
0, 0, 0, 0, 0, 0, 0, 0, 1.24223602484472, 0, 0, 0, 0, 0, 1.24223602484472,
0, 1.88679245283019, 0, 0, 0, 0, 0, 0, 0, 0, 0), design = c("random",
"random", "random", "random", "random", "random", "random", "random",
"random", "random", "strat", "strat", "strat", "strat", "strat",
"strat", "strat", "strat", "strat", "strat", "hybrid", "hybrid",
"hybrid", "hybrid", "hybrid", "hybrid", "hybrid", "hybrid", "hybrid",
"hybrid")), row.names = c(NA, -30L), class = "data.frame")
我需要 运行 对按设计变量分组的每一列进行方差分析。因此,例如,我希望对物种 BLG 进行方差分析,测试每个设计之间的值差异。我可以通过过滤对单个物种执行此操作,但我也需要对其他所有列以及更多类似格式的数据帧执行相同的操作。然后做同样的事情,但对于 post-hoc 测试,在每个物种的设计之间发现差异。
我猜有一种方法可以用 map()
或 lapply()
之类的东西来做到这一点。我最初的想法是为每个列制作一个模型,例如 model<- lm(BLG ~ design, data=df)
,并在 map
函数中使用此格式为每个其他列执行此操作,然后对 ANOVA 测试进行类似的方法,但我卡住了(基本上是在一开始)。
这是我目前所在的位置:
test<- df %>%
names() %>%
paste('design ~', .) %>%
map(~lm(as.formula(.x), data=df))
导致以下错误:
Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
NA/NaN/Inf in 'y'
In addition: Warning message:
In storage.mode(v) <- "double" : NAs introduced by coercion
我的猜测是问题与此 link 中的示例 2 类似,但我不确定如果不是“设计”列,那里会出现什么问题。
任何人想到的可能有用的帮助或资源都将不胜感激。
感谢阅读。
design
是 character
class。公式应该颠倒吗?基于 ?lm
文档
Models for lm are specified symbolically. A typical model has the form response ~ terms where response is the (numeric) response vector and terms is a series of terms which specifies a linear predictor for response
purrr::map(names(df)[2:7], ~ lm(reformulate('design', response = .x), data = df))
-输出
[[1]]
Call:
lm(formula = reformulate("design", response = .x), data = df)
Coefficients:
(Intercept) designrandom designstrat
3.841 3.507 4.575
[[2]]
Call:
lm(formula = reformulate("design", response = .x), data = df)
Coefficients:
(Intercept) designrandom designstrat
22.66 24.14 14.49
[[3]]
Call:
lm(formula = reformulate("design", response = .x), data = df)
Coefficients:
(Intercept) designrandom designstrat
25.42 10.66 14.72
[[4]]
Call:
lm(formula = reformulate("design", response = .x), data = df)
Coefficients:
(Intercept) designrandom designstrat
7.106 2.263 6.218
[[5]]
Call:
lm(formula = reformulate("design", response = .x), data = df)
Coefficients:
(Intercept) designrandom designstrat
0.4703 2.3051 0.8267
[[6]]
Call:
lm(formula = reformulate("design", response = .x), data = df)
Coefficients:
(Intercept) designrandom designstrat
0.18868 0.12579 0.05977
我有以下数据框:
df<- structure(list(Group.1 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L), BLC = c(10.9890294366989, 7.31930466605672,
13.6185172644819, 2.7266530334015, 3.53565114908662, 7.20597804412166,
2.78164116828929, 7.59371098030222, 14.7343839844163, 2.9806259314456,
5.07619453154234, 6.88503820786366, 12.2882487654356, 1.40646976090014,
19.307342679726, 15.0249870253821, 4.34581364475618, 5.03491248395278,
7.94957003082448, 6.84343434343434, 11.2622383086214, 11.1839711729262,
4.7669094503789, 3.09762397594833, 8.10311438552811, 0, 0, 0,
0, 0), BLG = c(53.2196490874651, 23.9543988057977, 46.2826752583327,
34.9096813849679, 27.0376341826749, 49.2472186166963, 93.0631982759938,
46.1366527251764, 57.6460095990237, 36.5835422650789, 56.2627854701592,
30.1133129448127, 22.2997436361558, 28.9793549481134, 37.6201098690056,
59.8627031558285, 34.7171109184231, 48.9414623325316, 31.5061417556072,
21.2521546878513, 70.8263794749462, 24.930952093699, 39.307162693975,
28.9144148451338, 42.9157121339545, 3.94736842105263, 3.94736842105263,
3.94736842105263, 3.94736842105263, 3.94736842105263), LMB = c(75.2718187185061,
42.707035200077, 31.37371428004, 24.9294274297168, 21.619318105277,
19.8056309622509, 62.5665072062847, 30.2395840472535, 36.2246969501391,
16.053874321678, 73.325176836826, 32.1599744373439, 33.8619234899393,
39.1278597999347, 29.242123346214, 50.3372863653836, 23.3365756853847,
61.7018803213189, 18.2047745554517, 40.1231815267265, 36.2849916132823,
35.4393881210482, 41.6277079218274, 27.5840809362335, 14.5766262544513,
19.7368421052632, 19.7368421052632, 19.7368421052632, 19.7368421052632,
19.7368421052632), RSF = c(7.7061355134565, 6.57544180671257,
21.6485001821173, 14.3568910671585, 3.53565114908662, 2.89876366994815,
10.1680661480383, 17.3890884998598, 6.45810108311722, 2.95766439639045,
13.7591229373968, 21.1086837581149, 3.65965233302836, 26.151881306845,
9.17122695497959, 16.6545469585419, 8.26685329264933, 9.3745854643381,
1.4903129657228, 23.6018678125026, 7.04954072403232, 11.1546894959865,
20.5987856222152, 8.10190710702138, 4.41849566570698, 3.94736842105263,
3.94736842105263, 3.94736842105263, 3.94736842105263, 3.94736842105263
), GSF = c(0, 0, 0, 2.51341949455157, 8.34660636193077, 3.23362974939369,
2.85602204934611, 3.23362974939369, 3.63636363636364, 3.9344262295082,
0, 0, 3.9344262295082, 0, 1.46520146520147, 0, 0, 0, 3.9344262295082,
3.63636363636364, 1.46342316809685, 0.879120879120879, 0, 0,
2.36065573770492, 0, 0, 0, 0, 0), CCF = c(0, 0, 0, 3.14465408805032,
0, 0, 0, 0, 0, 0, 0, 0, 1.24223602484472, 0, 0, 0, 0, 0, 1.24223602484472,
0, 1.88679245283019, 0, 0, 0, 0, 0, 0, 0, 0, 0), design = c("random",
"random", "random", "random", "random", "random", "random", "random",
"random", "random", "strat", "strat", "strat", "strat", "strat",
"strat", "strat", "strat", "strat", "strat", "hybrid", "hybrid",
"hybrid", "hybrid", "hybrid", "hybrid", "hybrid", "hybrid", "hybrid",
"hybrid")), row.names = c(NA, -30L), class = "data.frame")
我需要 运行 对按设计变量分组的每一列进行方差分析。因此,例如,我希望对物种 BLG 进行方差分析,测试每个设计之间的值差异。我可以通过过滤对单个物种执行此操作,但我也需要对其他所有列以及更多类似格式的数据帧执行相同的操作。然后做同样的事情,但对于 post-hoc 测试,在每个物种的设计之间发现差异。
我猜有一种方法可以用 map()
或 lapply()
之类的东西来做到这一点。我最初的想法是为每个列制作一个模型,例如 model<- lm(BLG ~ design, data=df)
,并在 map
函数中使用此格式为每个其他列执行此操作,然后对 ANOVA 测试进行类似的方法,但我卡住了(基本上是在一开始)。
这是我目前所在的位置:
test<- df %>%
names() %>%
paste('design ~', .) %>%
map(~lm(as.formula(.x), data=df))
导致以下错误:
Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
NA/NaN/Inf in 'y'
In addition: Warning message:
In storage.mode(v) <- "double" : NAs introduced by coercion
我的猜测是问题与此 link 中的示例 2 类似,但我不确定如果不是“设计”列,那里会出现什么问题。
任何人想到的可能有用的帮助或资源都将不胜感激。
感谢阅读。
design
是 character
class。公式应该颠倒吗?基于 ?lm
文档
Models for lm are specified symbolically. A typical model has the form response ~ terms where response is the (numeric) response vector and terms is a series of terms which specifies a linear predictor for response
purrr::map(names(df)[2:7], ~ lm(reformulate('design', response = .x), data = df))
-输出
[[1]]
Call:
lm(formula = reformulate("design", response = .x), data = df)
Coefficients:
(Intercept) designrandom designstrat
3.841 3.507 4.575
[[2]]
Call:
lm(formula = reformulate("design", response = .x), data = df)
Coefficients:
(Intercept) designrandom designstrat
22.66 24.14 14.49
[[3]]
Call:
lm(formula = reformulate("design", response = .x), data = df)
Coefficients:
(Intercept) designrandom designstrat
25.42 10.66 14.72
[[4]]
Call:
lm(formula = reformulate("design", response = .x), data = df)
Coefficients:
(Intercept) designrandom designstrat
7.106 2.263 6.218
[[5]]
Call:
lm(formula = reformulate("design", response = .x), data = df)
Coefficients:
(Intercept) designrandom designstrat
0.4703 2.3051 0.8267
[[6]]
Call:
lm(formula = reformulate("design", response = .x), data = df)
Coefficients:
(Intercept) designrandom designstrat
0.18868 0.12579 0.05977