rbind() 是否仅迭代最后 3 个方差分析结果?
rbind() is iterating only the last 3 anova results?
我编写了一个循环,在其中遍历给定 .csv 的列和 运行 方差分析和事后测试。然后,我将每个结果组合到一个数据框中,并将其导出到一个 .csv 文件。但是,我无法让 rbind() 构建我的 data.frame。有什么帮助吗?这是脚本:
setwd("~/School/Lab/mice/sugar_study_2015/MG-RAST and Metagenassist/Trimmed/R. CSV")
#Save your Datasheet into variable X
x <- read.csv("T0_B_Class_Anova.csv")
x = x[1:9,]
x[is.na(x)] <- 0
DF.Anova <- data.frame()
DF.Tukey <- data.frame()
#Counts through the columns
for(i in 2:(ncol(x)-1)){
columns <- names(x[i])
##Runs an ANOVA - 'Group' being a grouping factor
anovaresult <- anova(aov(x[,i]~Group,data=x))
DF.Anova <- rbind(DF.Anova, anovaresult)
##fix anova into data frame
Famall = colnames(x)
Famall = as.data.frame(Famall)
Famall = Famall[2:83,]
Famall = as.data.frame(Famall)
DFanovanames = rep(Famall, each = 2)
DFanovanames = as.data.frame(DFanovanames)
#install.packages("tidyr")
library(tidyr)
anovanames = data.frame(Names=unlist(DFanovanames, use.names = FALSE))
o.anovanames = dplyr::arrange(anovanames, Names)
###dont forget to change this**************************
finalanova_BFT0 = cbind(rn = rownames(DF.Anova), DF.Anova, o.anovanames)
##Runs Tukeys Post-hoc test on Anova
posthocresult <- TukeyHSD(aov(x[,i]~Group,data=x))
DF.Tukey <- rbind(DF.Tukey, posthocresult$Group)
##fix tukey into data frame
Famname = colnames(x)
Famname = as.data.frame(Famname)
Famname = Famname[2:83,]
Famname = as.data.frame(Famname)
DFposthocnames = rep(Famname, each = 3)
DFposthocnames = data.frame(DFposthocnames)
#install.packages("tidyr")
library(tidyr)
library(dplyr)
posthocnames = data.frame(Names=unlist(DFposthocnames, use.names = FALSE))
o.posthocnames = dplyr::arrange(posthocnames, Names)
###dont forget to change this****************************
finalposthoc_BFT0 = cbind(rn = rownames(DF.Tukey), DF.Tukey, o.posthocnames)
##Prints posthoc results into txt file
print(columns)
print(anovaresult)
print(posthocresult)
}
write.csv(finalanova_BFT0, file="testfinalanova_BCT0")
write.csv(finalposthoc_BFT0, file="finalposthoc_BCT0")
您可以找到示例 .csv here
假设您想要的输出是 2 个数据帧,其中包含来自两个不同测试的摘要结果。您可以使用 purrr
包中的 map
函数和 broom
包中的 tidy
函数来完成此操作。我保存了您发布的 csv 并将其另存为 anova-question-data.csv
。如果您要使用 setwd
,我建议您验证您的数据是否被正确读取。这是我用来获取两个数据帧的代码:
# read in the data
df <- read_csv(file = "anova-question-data.csv")
# create a list to loop over in the `map` call.
loop_list <- colnames(df[,-1])
# create a list of data frames using the `tidy` function from `broom`
anova_list <- map(loop_list, function(x){
anova_results <- anova(aov(df[[x]]~df[["Group"]]))
# this tidies the results from the anova test and add a new
# column with the column name being tested.
# if bacteria is not your desired name, feel free to change it as
# it will not affect any of the rest of the code
output <- broom::tidy(anova_results) %>%
mutate(bacteria = x)
})
# use `do.call` to bind the dataframes in anova_list together
anova_df <- anova_list %>%
do.call(rbind, .)
# repeat the exact same process only changing `anova` with `TukeyHSD`
posthoc_list <- map(loop_list, function(x){
posthoc_results <- TukeyHSD(aov(df[[x]]~df[["Group"]]))
output <- broom::tidy(posthoc_results) %>%
mutate(bacteria = x)
})
posthoc_df <- posthoc_list %>%
do.call(rbind, .)
这将为您提供以下两个输出(我只打印前 5 行):
> head(anova_df, 5)
term df sumsq meansq statistic p.value bacteria
1 df[["Group"]] 2 1.265562e-07 6.327809e-08 0.02650174 0.9739597 Acidobacteria
2 Residuals 6 1.432617e-05 2.387695e-06 NA NA Acidobacteria
3 df[["Group"]] 2 9.332880e-02 4.666440e-02 0.84001916 0.4768300 Actinobacteria
4 Residuals 6 3.333096e-01 5.555159e-02 NA NA Actinobacteria
5 df[["Group"]] 2 9.114521e-04 4.557261e-04 1.08994816 0.3946484 Alphaproteobacteria
> head(posthoc_df, 5)
term comparison estimate conf.low conf.high adj.p.value bacteria
1 df[["Group"]] HF-CO 2.234233e-04 -0.003647709 0.004094556 0.9829095 Acidobacteria
2 df[["Group"]] HFS-CO -4.903533e-05 -0.003920168 0.003822097 0.9991677 Acidobacteria
3 df[["Group"]] HFS-HF -2.724587e-04 -0.004143591 0.003598674 0.9747264 Acidobacteria
4 df[["Group"]] HF-CO 2.345822e-01 -0.355886402 0.825050849 0.4856694 Actinobacteria
5 df[["Group"]] HFS-CO 1.907267e-01 -0.399741917 0.781195333 0.6084817 Actinobacteria
我编写了一个循环,在其中遍历给定 .csv 的列和 运行 方差分析和事后测试。然后,我将每个结果组合到一个数据框中,并将其导出到一个 .csv 文件。但是,我无法让 rbind() 构建我的 data.frame。有什么帮助吗?这是脚本:
setwd("~/School/Lab/mice/sugar_study_2015/MG-RAST and Metagenassist/Trimmed/R. CSV")
#Save your Datasheet into variable X
x <- read.csv("T0_B_Class_Anova.csv")
x = x[1:9,]
x[is.na(x)] <- 0
DF.Anova <- data.frame()
DF.Tukey <- data.frame()
#Counts through the columns
for(i in 2:(ncol(x)-1)){
columns <- names(x[i])
##Runs an ANOVA - 'Group' being a grouping factor
anovaresult <- anova(aov(x[,i]~Group,data=x))
DF.Anova <- rbind(DF.Anova, anovaresult)
##fix anova into data frame
Famall = colnames(x)
Famall = as.data.frame(Famall)
Famall = Famall[2:83,]
Famall = as.data.frame(Famall)
DFanovanames = rep(Famall, each = 2)
DFanovanames = as.data.frame(DFanovanames)
#install.packages("tidyr")
library(tidyr)
anovanames = data.frame(Names=unlist(DFanovanames, use.names = FALSE))
o.anovanames = dplyr::arrange(anovanames, Names)
###dont forget to change this**************************
finalanova_BFT0 = cbind(rn = rownames(DF.Anova), DF.Anova, o.anovanames)
##Runs Tukeys Post-hoc test on Anova
posthocresult <- TukeyHSD(aov(x[,i]~Group,data=x))
DF.Tukey <- rbind(DF.Tukey, posthocresult$Group)
##fix tukey into data frame
Famname = colnames(x)
Famname = as.data.frame(Famname)
Famname = Famname[2:83,]
Famname = as.data.frame(Famname)
DFposthocnames = rep(Famname, each = 3)
DFposthocnames = data.frame(DFposthocnames)
#install.packages("tidyr")
library(tidyr)
library(dplyr)
posthocnames = data.frame(Names=unlist(DFposthocnames, use.names = FALSE))
o.posthocnames = dplyr::arrange(posthocnames, Names)
###dont forget to change this****************************
finalposthoc_BFT0 = cbind(rn = rownames(DF.Tukey), DF.Tukey, o.posthocnames)
##Prints posthoc results into txt file
print(columns)
print(anovaresult)
print(posthocresult)
}
write.csv(finalanova_BFT0, file="testfinalanova_BCT0")
write.csv(finalposthoc_BFT0, file="finalposthoc_BCT0")
您可以找到示例 .csv here
假设您想要的输出是 2 个数据帧,其中包含来自两个不同测试的摘要结果。您可以使用 purrr
包中的 map
函数和 broom
包中的 tidy
函数来完成此操作。我保存了您发布的 csv 并将其另存为 anova-question-data.csv
。如果您要使用 setwd
,我建议您验证您的数据是否被正确读取。这是我用来获取两个数据帧的代码:
# read in the data
df <- read_csv(file = "anova-question-data.csv")
# create a list to loop over in the `map` call.
loop_list <- colnames(df[,-1])
# create a list of data frames using the `tidy` function from `broom`
anova_list <- map(loop_list, function(x){
anova_results <- anova(aov(df[[x]]~df[["Group"]]))
# this tidies the results from the anova test and add a new
# column with the column name being tested.
# if bacteria is not your desired name, feel free to change it as
# it will not affect any of the rest of the code
output <- broom::tidy(anova_results) %>%
mutate(bacteria = x)
})
# use `do.call` to bind the dataframes in anova_list together
anova_df <- anova_list %>%
do.call(rbind, .)
# repeat the exact same process only changing `anova` with `TukeyHSD`
posthoc_list <- map(loop_list, function(x){
posthoc_results <- TukeyHSD(aov(df[[x]]~df[["Group"]]))
output <- broom::tidy(posthoc_results) %>%
mutate(bacteria = x)
})
posthoc_df <- posthoc_list %>%
do.call(rbind, .)
这将为您提供以下两个输出(我只打印前 5 行):
> head(anova_df, 5)
term df sumsq meansq statistic p.value bacteria
1 df[["Group"]] 2 1.265562e-07 6.327809e-08 0.02650174 0.9739597 Acidobacteria
2 Residuals 6 1.432617e-05 2.387695e-06 NA NA Acidobacteria
3 df[["Group"]] 2 9.332880e-02 4.666440e-02 0.84001916 0.4768300 Actinobacteria
4 Residuals 6 3.333096e-01 5.555159e-02 NA NA Actinobacteria
5 df[["Group"]] 2 9.114521e-04 4.557261e-04 1.08994816 0.3946484 Alphaproteobacteria
> head(posthoc_df, 5)
term comparison estimate conf.low conf.high adj.p.value bacteria
1 df[["Group"]] HF-CO 2.234233e-04 -0.003647709 0.004094556 0.9829095 Acidobacteria
2 df[["Group"]] HFS-CO -4.903533e-05 -0.003920168 0.003822097 0.9991677 Acidobacteria
3 df[["Group"]] HFS-HF -2.724587e-04 -0.004143591 0.003598674 0.9747264 Acidobacteria
4 df[["Group"]] HF-CO 2.345822e-01 -0.355886402 0.825050849 0.4856694 Actinobacteria
5 df[["Group"]] HFS-CO 1.907267e-01 -0.399741917 0.781195333 0.6084817 Actinobacteria