如何从嵌套数据帧创建直方图并将它们作为对象存储在 r 的列表中?
How can I create histograms from nested dataframes and store them as objects in a list in r?
我是一名编码新手。我正在尝试为需要大量渔业数据的工作创建一个闪亮的应用程序,计算一些指标,然后在 rMarkdown 文件中吐出所有必需的图和指标。这些数据集充满了对多个不同湖泊中多种不同物种的大量观察。我们想为每个湖泊的每个物种创建地块。
为了获得所需的输出,我相信我需要嵌套数据帧,为每个 lake_species 组合创建 geom_histograms(下面示例中的 cyl_gear 组合),然后将它们作为对象存储在主数据框中的 list/column 中,以便我可以将对象传递到 rMarkdown 中进行打印。
这是我要问的一个例子:
library(dplyr)
library(ggplot2)
library(tidyr)
library(purrr)
nested <- mtcars %>%
mutate(uniqueID=paste(mtcars$cyl, sep = "_", mtcars$gear),
gear2=gear) %>%
group_by(uniqueID, gear) %>%
nest()
histyfun <- function(x){ ## I know this set of case_when code does not work, but this
## is my most recent attempt at it.
case_when(x$gear=="3" ~
ggplot(data=x$data, aes(x=wt, fill=hp)) +
geom_histogram(binwidth = 0.2, color="black",
position = position_stack(reverse=TRUE),
breaks=seq(min(data$wt)-0.2, max(data$wt)+0.2, 0.2)) +
scale_fill_continuous(type = "gradient") +
scale_x_continuous(name="Weight",
breaks = seq(min(data$wt)-0.2, max(data$wt)+0.2, 0.2)) +
aes(y=stat(count)/sum(stat(count))) +
scale_y_continuous(name="Percent Frequency", labels = scales::percent,
breaks = seq(0, 1, 0.02)) +
labs(fill="") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black")),
x$gear=="4" ~
ggplot(data=x$data, aes(x=wt, fill=hp)) +
geom_histogram(binwidth = 0.1, color="black",
position = position_stack(reverse=TRUE),
breaks=seq(min(data$wt)-0.2, max(data$wt)+0.2, 0.2)) +
scale_fill_continuous(type = "gradient") +
scale_x_continuous(name="Weight",
breaks = seq(min(data$wt)-0.2, max(data$wt)+0.2, 0.2)) +
aes(y=stat(count)/sum(stat(count))) +
scale_y_continuous(name="Percent Frequency", labels = scales::percent,
breaks = seq(0, 1, 0.02)) +
labs(fill="") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black")),
x$gear=="5" ~
ggplot(data=x$data, aes(x=wt, fill=hp)) +
geom_histogram(binwidth = 0.3, color="black",
position = position_stack(reverse=TRUE),
breaks=seq(min(data$wt)-0.2, max(data$wt)+0.2, 0.2)) +
scale_fill_continuous(type = "gradient") +
scale_x_continuous(name="Weight",
breaks = seq(min(data$wt)-0.2, max(data$wt)+0.2, 0.2)) +
aes(y=stat(count)/sum(stat(count))) +
scale_y_continuous(name="Percent Frequency", labels = scales::percent,
breaks = seq(0, 1, 0.02)) +
labs(fill="") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black")),
TRUE ~ 0
)
}
mutate(nested, histogram = nested %>% map(histyfun))
我知道上面的代码不起作用,但它应该能说明我正在尝试创建什么。
我正在努力解决如何:A) 通过在嵌套数据框中调用适当的列(此处示例中的 wt)来创建我的 geom_histograms,然后 B) 如何将这些直方图存储为对象新 column/list。我不知道我在做什么,感谢您能给我的任何 pointers/tips。谢谢!
tidyverse 包对于大多数数据操作来说非常有用,但它们并不是真正为实现功能而设计的。虽然这种方法公认不雅且 old-school,但我认为它会给你想要的东西。我修改了你的函数以在列表中调用。我没有使用 case_when()
函数来更改 tibble 或数据帧中的值,而是使用了 if()
和 else()
语句。此外,您的函数没有 return()
调用,所以我将其添加进来。看看它,希望它就是您所追求的。
library(dplyr)
library(ggplot2)
library(tidyr)
library(purrr)
nested <- mtcars %>%
mutate(uniqueID=paste(mtcars$cyl, sep = "_", mtcars$gear),
gear2=gear) %>%
group_by(uniqueID, gear) %>%
nest()
histyfun <- function(x){ ## I know this set of case_when code does not work, but this is my most
## recent attempt at it.
if(unique(x$gear2)==3){
Y<-ggplot(data=x, aes(x=wt, fill=hp)) +
geom_histogram(binwidth = 0.2, color="black", position = position_stack(reverse=TRUE),
breaks=seq(min(x$wt)-0.2, max(x$wt)+0.2, 0.2)) +
scale_fill_continuous(type = "gradient") +
scale_x_continuous(name="Weight", breaks = seq(min(x$wt)-0.2, max(x$wt)+0.2, 0.2)) +
aes(y=stat(count)/sum(stat(count))) +
scale_y_continuous(name="Percent Frequency", labels = scales::percent,
breaks = seq(0, 1, 0.02)) +
labs(fill="") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
}else{
if(unique(x$gear2)==4){
Y<-ggplot(data=x, aes(x=wt, fill=hp)) +
geom_histogram(binwidth = 0.1, color="black", position = position_stack(reverse=TRUE),
breaks=seq(min(x$wt)-0.2, max(x$wt)+0.2, 0.2)) +
scale_fill_continuous(type = "gradient") +
scale_x_continuous(name="Weight", breaks = seq(min(x$wt)-0.2, max(x$wt)+0.2, 0.2)) +
aes(y=stat(count)/sum(stat(count))) +
scale_y_continuous(name="Percent Frequency", labels = scales::percent,
breaks = seq(0, 1, 0.02)) +
labs(fill="") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
}
else{
if(unique(x$gear2)==5){
Y<-ggplot(data=x, aes(x=wt, fill=hp)) +
geom_histogram(binwidth = 0.3, color="black", position = position_stack(reverse=TRUE),
breaks=seq(min(x$wt)-0.2, max(x$wt)+0.2, 0.2)) +
scale_fill_continuous(type = "gradient") +
scale_x_continuous(name="Weight", breaks = seq(min(x$wt)-0.2, max(x$wt)+0.2, 0.2)) +
aes(y=stat(count)/sum(stat(count))) +
scale_y_continuous(name="Percent Frequency", labels = scales::percent,
breaks = seq(0, 1, 0.02)) +
labs(fill="") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
}
}
}
return(Y)
}
nest_list<-as.list(nested$data)
tmp<-lapply(nest_list, as.data.frame)
par(mfrow=c(2,4))
lapply(tmp, histyfun)
tidyverse
方法可能看起来像这样。
- 使您的函数成为两个(或...)参数的函数,例如
gear
和数据集 x
- 您可以使用
purrr::pmap
(或 map2
)代替 purrr::map
来遍历嵌套数据集的 gear
和 data
列
- 您还可以大大简化您的函数。不要复制绘图代码,而是使用
if
或 switch
有条件地设置根据齿轮数量而变化的参数,例如如果你的代表是 binwidth
参数。
顺便说一句:在 group_by 之后 ungroup
总是一个好主意(尤其是嵌套)。
library(dplyr)
library(ggplot2)
library(tidyr)
library(purrr)
histyfun <- function(gear, x) { ## I know this set of case_when code does not work, but this
binwidth <- switch(as.character(gear), "3" = .2, "4" = 0.1, .3)
breaks_x <- seq(min(x$wt) - 0.2, max(x$wt) + 0.2, 0.2)
ggplot(data = x, aes(x = wt, fill = hp)) +
geom_histogram(
binwidth = binwidth, color = "black",
position = position_stack(reverse = TRUE)
) +
scale_fill_continuous(type = "gradient") +
scale_x_continuous(
name = "Weight",
breaks = breaks_x
) +
aes(y = stat(count) / sum(stat(count))) +
scale_y_continuous(
name = "Percent Frequency", labels = scales::percent,
breaks = seq(0, 1, 0.02)
) +
labs(fill = "") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black")
)
}
nested <- mtcars %>%
mutate(
uniqueID = paste(mtcars$cyl, sep = "_", mtcars$gear),
gear2 = gear
) %>%
group_by(uniqueID, gear) %>%
nest() %>%
ungroup()
mutate(nested, histogram = pmap(list(gear = gear, x = data), histyfun))
#> # A tibble: 8 × 4
#> gear uniqueID data histogram
#> <dbl> <chr> <list> <list>
#> 1 4 6_4 <tibble [4 × 11]> <gg>
#> 2 4 4_4 <tibble [8 × 11]> <gg>
#> 3 3 6_3 <tibble [2 × 11]> <gg>
#> 4 3 8_3 <tibble [12 × 11]> <gg>
#> 5 3 4_3 <tibble [1 × 11]> <gg>
#> 6 5 4_5 <tibble [2 × 11]> <gg>
#> 7 5 8_5 <tibble [2 × 11]> <gg>
#> 8 5 6_5 <tibble [1 × 11]> <gg>
我是一名编码新手。我正在尝试为需要大量渔业数据的工作创建一个闪亮的应用程序,计算一些指标,然后在 rMarkdown 文件中吐出所有必需的图和指标。这些数据集充满了对多个不同湖泊中多种不同物种的大量观察。我们想为每个湖泊的每个物种创建地块。
为了获得所需的输出,我相信我需要嵌套数据帧,为每个 lake_species 组合创建 geom_histograms(下面示例中的 cyl_gear 组合),然后将它们作为对象存储在主数据框中的 list/column 中,以便我可以将对象传递到 rMarkdown 中进行打印。
这是我要问的一个例子:
library(dplyr)
library(ggplot2)
library(tidyr)
library(purrr)
nested <- mtcars %>%
mutate(uniqueID=paste(mtcars$cyl, sep = "_", mtcars$gear),
gear2=gear) %>%
group_by(uniqueID, gear) %>%
nest()
histyfun <- function(x){ ## I know this set of case_when code does not work, but this
## is my most recent attempt at it.
case_when(x$gear=="3" ~
ggplot(data=x$data, aes(x=wt, fill=hp)) +
geom_histogram(binwidth = 0.2, color="black",
position = position_stack(reverse=TRUE),
breaks=seq(min(data$wt)-0.2, max(data$wt)+0.2, 0.2)) +
scale_fill_continuous(type = "gradient") +
scale_x_continuous(name="Weight",
breaks = seq(min(data$wt)-0.2, max(data$wt)+0.2, 0.2)) +
aes(y=stat(count)/sum(stat(count))) +
scale_y_continuous(name="Percent Frequency", labels = scales::percent,
breaks = seq(0, 1, 0.02)) +
labs(fill="") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black")),
x$gear=="4" ~
ggplot(data=x$data, aes(x=wt, fill=hp)) +
geom_histogram(binwidth = 0.1, color="black",
position = position_stack(reverse=TRUE),
breaks=seq(min(data$wt)-0.2, max(data$wt)+0.2, 0.2)) +
scale_fill_continuous(type = "gradient") +
scale_x_continuous(name="Weight",
breaks = seq(min(data$wt)-0.2, max(data$wt)+0.2, 0.2)) +
aes(y=stat(count)/sum(stat(count))) +
scale_y_continuous(name="Percent Frequency", labels = scales::percent,
breaks = seq(0, 1, 0.02)) +
labs(fill="") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black")),
x$gear=="5" ~
ggplot(data=x$data, aes(x=wt, fill=hp)) +
geom_histogram(binwidth = 0.3, color="black",
position = position_stack(reverse=TRUE),
breaks=seq(min(data$wt)-0.2, max(data$wt)+0.2, 0.2)) +
scale_fill_continuous(type = "gradient") +
scale_x_continuous(name="Weight",
breaks = seq(min(data$wt)-0.2, max(data$wt)+0.2, 0.2)) +
aes(y=stat(count)/sum(stat(count))) +
scale_y_continuous(name="Percent Frequency", labels = scales::percent,
breaks = seq(0, 1, 0.02)) +
labs(fill="") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black")),
TRUE ~ 0
)
}
mutate(nested, histogram = nested %>% map(histyfun))
我知道上面的代码不起作用,但它应该能说明我正在尝试创建什么。
我正在努力解决如何:A) 通过在嵌套数据框中调用适当的列(此处示例中的 wt)来创建我的 geom_histograms,然后 B) 如何将这些直方图存储为对象新 column/list。我不知道我在做什么,感谢您能给我的任何 pointers/tips。谢谢!
tidyverse 包对于大多数数据操作来说非常有用,但它们并不是真正为实现功能而设计的。虽然这种方法公认不雅且 old-school,但我认为它会给你想要的东西。我修改了你的函数以在列表中调用。我没有使用 case_when()
函数来更改 tibble 或数据帧中的值,而是使用了 if()
和 else()
语句。此外,您的函数没有 return()
调用,所以我将其添加进来。看看它,希望它就是您所追求的。
library(dplyr)
library(ggplot2)
library(tidyr)
library(purrr)
nested <- mtcars %>%
mutate(uniqueID=paste(mtcars$cyl, sep = "_", mtcars$gear),
gear2=gear) %>%
group_by(uniqueID, gear) %>%
nest()
histyfun <- function(x){ ## I know this set of case_when code does not work, but this is my most
## recent attempt at it.
if(unique(x$gear2)==3){
Y<-ggplot(data=x, aes(x=wt, fill=hp)) +
geom_histogram(binwidth = 0.2, color="black", position = position_stack(reverse=TRUE),
breaks=seq(min(x$wt)-0.2, max(x$wt)+0.2, 0.2)) +
scale_fill_continuous(type = "gradient") +
scale_x_continuous(name="Weight", breaks = seq(min(x$wt)-0.2, max(x$wt)+0.2, 0.2)) +
aes(y=stat(count)/sum(stat(count))) +
scale_y_continuous(name="Percent Frequency", labels = scales::percent,
breaks = seq(0, 1, 0.02)) +
labs(fill="") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
}else{
if(unique(x$gear2)==4){
Y<-ggplot(data=x, aes(x=wt, fill=hp)) +
geom_histogram(binwidth = 0.1, color="black", position = position_stack(reverse=TRUE),
breaks=seq(min(x$wt)-0.2, max(x$wt)+0.2, 0.2)) +
scale_fill_continuous(type = "gradient") +
scale_x_continuous(name="Weight", breaks = seq(min(x$wt)-0.2, max(x$wt)+0.2, 0.2)) +
aes(y=stat(count)/sum(stat(count))) +
scale_y_continuous(name="Percent Frequency", labels = scales::percent,
breaks = seq(0, 1, 0.02)) +
labs(fill="") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
}
else{
if(unique(x$gear2)==5){
Y<-ggplot(data=x, aes(x=wt, fill=hp)) +
geom_histogram(binwidth = 0.3, color="black", position = position_stack(reverse=TRUE),
breaks=seq(min(x$wt)-0.2, max(x$wt)+0.2, 0.2)) +
scale_fill_continuous(type = "gradient") +
scale_x_continuous(name="Weight", breaks = seq(min(x$wt)-0.2, max(x$wt)+0.2, 0.2)) +
aes(y=stat(count)/sum(stat(count))) +
scale_y_continuous(name="Percent Frequency", labels = scales::percent,
breaks = seq(0, 1, 0.02)) +
labs(fill="") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
}
}
}
return(Y)
}
nest_list<-as.list(nested$data)
tmp<-lapply(nest_list, as.data.frame)
par(mfrow=c(2,4))
lapply(tmp, histyfun)
tidyverse
方法可能看起来像这样。
- 使您的函数成为两个(或...)参数的函数,例如
gear
和数据集x
- 您可以使用
purrr::pmap
(或map2
)代替purrr::map
来遍历嵌套数据集的gear
和data
列 - 您还可以大大简化您的函数。不要复制绘图代码,而是使用
if
或switch
有条件地设置根据齿轮数量而变化的参数,例如如果你的代表是binwidth
参数。
顺便说一句:在 group_by 之后 ungroup
总是一个好主意(尤其是嵌套)。
library(dplyr)
library(ggplot2)
library(tidyr)
library(purrr)
histyfun <- function(gear, x) { ## I know this set of case_when code does not work, but this
binwidth <- switch(as.character(gear), "3" = .2, "4" = 0.1, .3)
breaks_x <- seq(min(x$wt) - 0.2, max(x$wt) + 0.2, 0.2)
ggplot(data = x, aes(x = wt, fill = hp)) +
geom_histogram(
binwidth = binwidth, color = "black",
position = position_stack(reverse = TRUE)
) +
scale_fill_continuous(type = "gradient") +
scale_x_continuous(
name = "Weight",
breaks = breaks_x
) +
aes(y = stat(count) / sum(stat(count))) +
scale_y_continuous(
name = "Percent Frequency", labels = scales::percent,
breaks = seq(0, 1, 0.02)
) +
labs(fill = "") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black")
)
}
nested <- mtcars %>%
mutate(
uniqueID = paste(mtcars$cyl, sep = "_", mtcars$gear),
gear2 = gear
) %>%
group_by(uniqueID, gear) %>%
nest() %>%
ungroup()
mutate(nested, histogram = pmap(list(gear = gear, x = data), histyfun))
#> # A tibble: 8 × 4
#> gear uniqueID data histogram
#> <dbl> <chr> <list> <list>
#> 1 4 6_4 <tibble [4 × 11]> <gg>
#> 2 4 4_4 <tibble [8 × 11]> <gg>
#> 3 3 6_3 <tibble [2 × 11]> <gg>
#> 4 3 8_3 <tibble [12 × 11]> <gg>
#> 5 3 4_3 <tibble [1 × 11]> <gg>
#> 6 5 4_5 <tibble [2 × 11]> <gg>
#> 7 5 8_5 <tibble [2 × 11]> <gg>
#> 8 5 6_5 <tibble [1 × 11]> <gg>