从 for 循环输出创建多个箱线图
Creating multiple boxplots from for loop output
在这个数据框中,有一个样本点列表和每个站点内捕获的每个物种的种群参数估计值。我正在尝试 运行 一个 for 循环,该循环随机选择 5 个样本站点 100 次,将这些站点存储到一个对象中,然后获得考虑一个物种在所有 100 次迭代中的所有值的箱线图。
所以在最后的输出中会有一个物种 1、2、3、4、5 的箱线图 - 构成每个箱线图的值来自所有 100 个抽取的样本站点。
数据框:
homer_cpue_wide<- structure(list(sample_site = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
), species_1= c(75, 38.4204909284952, 77.634011090573, 82.1917808219178,
119.341563786008, 22.5422667501565, 155.275381552754, 81.1332904056665,
37.037037037037, 73.2824427480916, 71.608040201005, 208.806818181818,
116.504854368932, 119.775421085465, 104.408352668213, 117.391304347826,
12.0603015075377, 93.5593220338983, 166.795366795367, 20, 91.566265060241,
70.8860759493671, NA, 44.8765893792072, NA, 3.96563119629874),
species_2= c(NA, 6.4034151547492, 11.090573012939, 16.4383561643836,
4.11522633744856, NA, NA, NA, NA, 4.58015267175573, NA, 21.3068181818182,
NA, NA, 6.96055684454756, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), species_3= c(51.3157894736842, 83.2443970117396,
73.9371534195933, 71.2328767123288, 28.8065843621399, 37.5704445835942,
59.721300597213, 38.6349001931745, 66.6666666666667, 77.8625954198473,
71.608040201005, 63.9204545454545, 46.6019417475728, 22.4578914535246,
6.96055684454756, 13.0434782608696, 24.1206030150754, 40.6779661016949,
60.2316602316602, 56, 28.9156626506024, 55.6962025316456,
20.2360876897133, 31.413612565445, NA, 31.7250495703899),
species_4= c(NA, NA, 14.7874306839187, NA, 4.11522633744856, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4.63320463320463,
NA, NA, NA, NA, NA, NA, NA), species_5= c(7.89473684210526, 12.8068303094984,
7.39371534195933, 10.958904109589, NA, NA, 3.9814200398142,
NA, 3.7037037037037, 13.7404580152672, 11.3065326633166,
12.7840909090909, 3.88349514563107, 3.74298190892077, NA,
NA, NA, 4.06779661016949, NA, 16, 9.63855421686747, 5.06329113924051,
20.2360876897133, 22.4382946896036, NA, 7.93126239259749)), row.names = c(NA,
-26L), groups = structure(list(waterbody = c("Homer", "Homer",
"Homer", "Homer", "Homer", "Homer", "Homer", "Homer", "Homer",
"Homer", "Homer", "Homer", "Homer", "Homer", "Homer", "Homer",
"Homer", "Homer", "Homer", "Homer", "Homer", "Homer", "Homer",
"Homer", "Homer", "Homer"), transect_number = c(1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26), .rows = structure(list(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -26L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
当前代码:
list_new<- list()
counter<- 1
for (i in 1:100) {
tempsample<- homer_cpue_wide[sample(1:nrow(homer_cpue_wide), 5, replace=F), ]
#store results of sampling into a list
list_new[[length(list_new)+1]]= tempsample
# create a box plot for each individual species CPUE estimates
x[i]<- boxplot(as.list(tempsample[,2:5]))+
theme(axis.text.x = element_text(angle=45,hjust=1))
counter<- counter+1
print(counter)
}
运行 我确实得到了一个包含正确信息的箱线图(除了主题),但它只包含来自 5 个样本站点(这里是“tempsample”)的单一抓取的值,而不是 100 次抓取。此外,箱线图的 [2:5] 条件是排除其他变量,例如示例站点 - 这是有效的,但我假设有更好的方法来做到这一点。
如有任何帮助,我们将不胜感激。提前致谢
我不确定这是否是您想要的,但这是我的看法。
list_new<- list()
counter<- 0
for (i in 1:100) {
tempsample<- homer_cpue_wide[sample(1:nrow(homer_cpue_wide), 5, replace=F), ]
#store results of sampling into a list
list_new[[i]]= tempsample
counter<- counter+1
print(counter)
}
您只需要将 for 循环 select 5 个示例站点 100 次。在循环之外,您可以 rbind
100 次迭代的结果,然后绘制它(我使用 tidyverse 和 ggplot2)
list_new <- do.call(rbind, list_new)
library(tidyverse)
list_new <- pivot_longer(list_new, cols = 2:6, names_to = "species", values_to =
"value")
library(ggplot2)
ggplot(list_new, aes(x=species, y = value))+
geom_boxplot()+
geom_jitter(alpha = 0.5, width = 0.2)
这是我得到的结果:
在这个数据框中,有一个样本点列表和每个站点内捕获的每个物种的种群参数估计值。我正在尝试 运行 一个 for 循环,该循环随机选择 5 个样本站点 100 次,将这些站点存储到一个对象中,然后获得考虑一个物种在所有 100 次迭代中的所有值的箱线图。
所以在最后的输出中会有一个物种 1、2、3、4、5 的箱线图 - 构成每个箱线图的值来自所有 100 个抽取的样本站点。
数据框:
homer_cpue_wide<- structure(list(sample_site = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
), species_1= c(75, 38.4204909284952, 77.634011090573, 82.1917808219178,
119.341563786008, 22.5422667501565, 155.275381552754, 81.1332904056665,
37.037037037037, 73.2824427480916, 71.608040201005, 208.806818181818,
116.504854368932, 119.775421085465, 104.408352668213, 117.391304347826,
12.0603015075377, 93.5593220338983, 166.795366795367, 20, 91.566265060241,
70.8860759493671, NA, 44.8765893792072, NA, 3.96563119629874),
species_2= c(NA, 6.4034151547492, 11.090573012939, 16.4383561643836,
4.11522633744856, NA, NA, NA, NA, 4.58015267175573, NA, 21.3068181818182,
NA, NA, 6.96055684454756, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), species_3= c(51.3157894736842, 83.2443970117396,
73.9371534195933, 71.2328767123288, 28.8065843621399, 37.5704445835942,
59.721300597213, 38.6349001931745, 66.6666666666667, 77.8625954198473,
71.608040201005, 63.9204545454545, 46.6019417475728, 22.4578914535246,
6.96055684454756, 13.0434782608696, 24.1206030150754, 40.6779661016949,
60.2316602316602, 56, 28.9156626506024, 55.6962025316456,
20.2360876897133, 31.413612565445, NA, 31.7250495703899),
species_4= c(NA, NA, 14.7874306839187, NA, 4.11522633744856, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4.63320463320463,
NA, NA, NA, NA, NA, NA, NA), species_5= c(7.89473684210526, 12.8068303094984,
7.39371534195933, 10.958904109589, NA, NA, 3.9814200398142,
NA, 3.7037037037037, 13.7404580152672, 11.3065326633166,
12.7840909090909, 3.88349514563107, 3.74298190892077, NA,
NA, NA, 4.06779661016949, NA, 16, 9.63855421686747, 5.06329113924051,
20.2360876897133, 22.4382946896036, NA, 7.93126239259749)), row.names = c(NA,
-26L), groups = structure(list(waterbody = c("Homer", "Homer",
"Homer", "Homer", "Homer", "Homer", "Homer", "Homer", "Homer",
"Homer", "Homer", "Homer", "Homer", "Homer", "Homer", "Homer",
"Homer", "Homer", "Homer", "Homer", "Homer", "Homer", "Homer",
"Homer", "Homer", "Homer"), transect_number = c(1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26), .rows = structure(list(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -26L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
当前代码:
list_new<- list()
counter<- 1
for (i in 1:100) {
tempsample<- homer_cpue_wide[sample(1:nrow(homer_cpue_wide), 5, replace=F), ]
#store results of sampling into a list
list_new[[length(list_new)+1]]= tempsample
# create a box plot for each individual species CPUE estimates
x[i]<- boxplot(as.list(tempsample[,2:5]))+
theme(axis.text.x = element_text(angle=45,hjust=1))
counter<- counter+1
print(counter)
}
运行 我确实得到了一个包含正确信息的箱线图(除了主题),但它只包含来自 5 个样本站点(这里是“tempsample”)的单一抓取的值,而不是 100 次抓取。此外,箱线图的 [2:5] 条件是排除其他变量,例如示例站点 - 这是有效的,但我假设有更好的方法来做到这一点。
如有任何帮助,我们将不胜感激。提前致谢
我不确定这是否是您想要的,但这是我的看法。
list_new<- list()
counter<- 0
for (i in 1:100) {
tempsample<- homer_cpue_wide[sample(1:nrow(homer_cpue_wide), 5, replace=F), ]
#store results of sampling into a list
list_new[[i]]= tempsample
counter<- counter+1
print(counter)
}
您只需要将 for 循环 select 5 个示例站点 100 次。在循环之外,您可以 rbind
100 次迭代的结果,然后绘制它(我使用 tidyverse 和 ggplot2)
list_new <- do.call(rbind, list_new)
library(tidyverse)
list_new <- pivot_longer(list_new, cols = 2:6, names_to = "species", values_to =
"value")
library(ggplot2)
ggplot(list_new, aes(x=species, y = value))+
geom_boxplot()+
geom_jitter(alpha = 0.5, width = 0.2)
这是我得到的结果: