如何根据 R 包 Growthrates 中的非线性回归在 ggplot 中重现绘图?
How do I reproduce a plot in ggplot based on nonlinear regressions from the R package Growthrates?
我正在使用 Growthrates 包为我的数据生成增长率曲线的参数估计值。我已经进行了回归并查看了生成的图,我对数据很满意,但我想在 ggplot2 中重现以下图。
图 1:Multiplot of a regression for each group:treatment combo
我想要每个 group:Treatment 组合的回归线的多图,但我在((即逻辑、gompertz、gompertz2 等)中对其执行了所有回归。到目前为止我有:
library(growthrates)
####Using logistic regression to fit the data across mutliple groups
p <- c(y0 = 1, mumax = 0.5, K = 200)
lower <- c(y0 = 0, mumax = 0, K = 20)
upper <- c(y0 = 100, mumax = 5, K = 400)
many_logistics <- all_growthmodels(y_data ~
grow_logistic(total_time_days, parms) | sample + treatment,
data = Alldata,
p = p,
lower = lower,
upper = upper,
log = "y")
pp <- coef(many_logistics)
par(mfrow = c(5, 3))
par(mar = c(2.5, 4, 2, 1))
plot(many_logistics)
many_logistics_results <- results(many_logistics)
xyplot(mumax ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))
xyplot(r2 ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))
xyplot(K ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))
curve_logistics <- predict(many_logistics) #Prediction for given data (data for curve)
est_logistics <- predict(many_logistics, newdata=data.frame(time=seq(0, 1, 0.1))) #Extrapolation/Interpolation from curve
####Using Gompertz regression to fit the data across mutliple groups
p <- c(y0 = 1, mumax = 0.5, K = 200)
lower <- c(y0 = 0, mumax = 0, K = 20)
upper <- c(y0 = 100, mumax = 5, K = 400)
many_gompertz <- all_growthmodels(y_datay_data ~
grow_gompertz(total_time_days, parms) | sample + treatment,
data = Alldata,
p = p,
lower = lower,
upper = upper)
pp <- coef(many_gompertz)
par(mfrow = c(5, 3))
par(mar = c(2.5, 4, 2, 1))
plot(many_gompertz)
many_gompertz_results <- results(many_gompertz)
xyplot(mumax ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))
xyplot(r2 ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))
xyplot(K ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))
curve_gompertz <- predict(many_gompertz) #Prediction for given data (data for curve)
est_gompertz <- predict(many_gompertz, newdata=data.frame(time=seq(0, 1, 0.1))) #Extrapolation/Interpolation from curve
#Prepare the data frames
curve_logistics2 <- curve_logistics %>%
map_df(as_tibble, .id = "src") %>%
separate(src, c("sample", "treatment"), ":") %>%
mutate(regression = "logistic")
curve_gompertz2 <- curve_gompertz %>%
map_df(as_tibble, .id = "src") %>%
separate(src, c("sample", "treatment"), ":") %>%
mutate(regression = "gompertz")
alldata2<- Alldata %>%
select("sample", "treatment","total_time_days", "y_data") %>%
rename(time = "total_time_days") %>%
rename(y = "y_data") %>%
mutate(regression = "none")
comp_reg <- bind_rows(curve_logistics2, curve_gompertz2, alldata2)
#define the function to automatically generate plots#define the function to automatically generate plots
REGRESSION_LINE_PLOT <-function(x) {ggplot(data = x, aes(x=time, y=y, colour = regression, linetype = regression)) +
geom_point(size = 2.5, data = subset(x, regression %in% c("none"))) +
stat_smooth(data = subset(x, regression %in% c("gompertz", "logistic"))) +
theme_bw() +
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.spacing = unit(0,"cm"),
axis.line=element_line(colour="black"),
# axis.title.x = element_text(size=14, colour = "black"),
axis.title.x = element_blank(),
# axis.title.y = element_text(size=14, colour = "black"),
axis.title.y = element_blank(),
# axis.text.y = element_text(size=14, colour = "black"),
# axis.text.x = element_text(size=14, colour = "black"),
strip.background = element_blank(),
strip.text = element_text(size = 12, colour="black", face = "bold"),
legend.text= element_text(size = 12, colour = "black"),
legend.title=element_blank(),
text = element_text(size=12, family="Arial")) +
# plot.margin=unit(c(0.1,0.1,0.1,0.1),"cm")) +
#scale_colour_manual(values = cbbPalette) + ### here I tell R to use my custom colour palette
#scale_x_continuous(limits = c(-1,14)) + # set time range from -1 to 70 since we started sampling on day -1
#scale_y_continuous(limits = c(-1,350), breaks = seq(0, 360, 90)) + # For comparison purposes, i want all my panels to have the same y axis scale
ylab("") +
xlab("")
}
comp_reg_nested<- comp_reg %>%
group_by(sample, treatment) %>%
nest() %>%
mutate(plots=map(.x=data, ~REGRESSION_LINE_PLOT(.x)))
fo_ad_line <- comp_reg_nested[[1,"plots"]]
但是,我认为回归线在 ggplot22 中没有正确表示。有更好的方法吗?
我根据包的内置数据创建了一个与您的数据结构或多或少相似的数据示例,并稍微简化了代码,省略了默认的绘图函数。我非常喜欢您使用 map_df
构建数据框的方法,谢谢。然后我添加了一个简单的 ggplot,当然可以根据您的需要进行扩展和调整。
library(growthrates)
library(dplyr)
library(purrr)
library(tidyr)
library(ggplot2)
## use subset of built-in example data of the package
## and adapt it to the existing part of the script
data(bactgrowth)
Alldata <- bactgrowth[(bactgrowth$conc < 1) & bactgrowth$replicate == 1, ]
names(Alldata) <- c("sample", "replicate", "treatment", "total_time_days", "y_data")
Alldata$y_data <- Alldata$y_data * 1000
Alldata$treatment <- as.character(Alldata$treatment)
####Using logistic regression to fit the data across mutliple groups
p <- c(y0 = 1, mumax = 0.5, K = 200)
lower <- c(y0 = 0, mumax = 0, K = 20)
upper <- c(y0 = 100, mumax = 5, K = 400)
many_logistics <- all_growthmodels(y_data ~
grow_logistic(total_time_days, parms) | sample + treatment,
data = Alldata,
p = p,
lower = lower,
upper = upper)
many_logistics_results <- results(many_logistics)
curve_logistics <- predict(many_logistics)
####Using Gompertz regression to fit the data across mutliple groups
many_gompertz <- all_growthmodels(y_data ~
grow_gompertz(total_time_days, parms) | sample + treatment,
data = Alldata,
p = p,
lower = lower,
upper = upper)
many_gompertz_results <- results(many_gompertz)
curve_gompertz <- predict(many_gompertz)
#Prepare the data frames
curve_logistics2 <- curve_logistics %>%
map_df(as_tibble, .id = "src") %>%
separate(src, c("sample", "treatment"), ":") %>%
mutate(regression = "logistic")
curve_gompertz2 <- curve_gompertz %>%
map_df(as_tibble, .id = "src") %>%
separate(src, c("sample", "treatment"), ":") %>%
mutate(regression = "gompertz")
alldata2<- Alldata %>%
rename(time = "total_time_days", y = "y_data")
## combine the two curves to a joint data frame
comp_reg <- bind_rows(curve_logistics2, curve_gompertz2)
## plot it
ggplot(comp_reg, aes(time, y)) +
geom_point(data = alldata2) +
geom_line(aes(color = regression)) +
facet_grid(treatment ~ sample)
我正在使用 Growthrates 包为我的数据生成增长率曲线的参数估计值。我已经进行了回归并查看了生成的图,我对数据很满意,但我想在 ggplot2 中重现以下图。
图 1:Multiplot of a regression for each group:treatment combo
我想要每个 group:Treatment 组合的回归线的多图,但我在((即逻辑、gompertz、gompertz2 等)中对其执行了所有回归。到目前为止我有:
library(growthrates)
####Using logistic regression to fit the data across mutliple groups
p <- c(y0 = 1, mumax = 0.5, K = 200)
lower <- c(y0 = 0, mumax = 0, K = 20)
upper <- c(y0 = 100, mumax = 5, K = 400)
many_logistics <- all_growthmodels(y_data ~
grow_logistic(total_time_days, parms) | sample + treatment,
data = Alldata,
p = p,
lower = lower,
upper = upper,
log = "y")
pp <- coef(many_logistics)
par(mfrow = c(5, 3))
par(mar = c(2.5, 4, 2, 1))
plot(many_logistics)
many_logistics_results <- results(many_logistics)
xyplot(mumax ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))
xyplot(r2 ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))
xyplot(K ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))
curve_logistics <- predict(many_logistics) #Prediction for given data (data for curve)
est_logistics <- predict(many_logistics, newdata=data.frame(time=seq(0, 1, 0.1))) #Extrapolation/Interpolation from curve
####Using Gompertz regression to fit the data across mutliple groups
p <- c(y0 = 1, mumax = 0.5, K = 200)
lower <- c(y0 = 0, mumax = 0, K = 20)
upper <- c(y0 = 100, mumax = 5, K = 400)
many_gompertz <- all_growthmodels(y_datay_data ~
grow_gompertz(total_time_days, parms) | sample + treatment,
data = Alldata,
p = p,
lower = lower,
upper = upper)
pp <- coef(many_gompertz)
par(mfrow = c(5, 3))
par(mar = c(2.5, 4, 2, 1))
plot(many_gompertz)
many_gompertz_results <- results(many_gompertz)
xyplot(mumax ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))
xyplot(r2 ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))
xyplot(K ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))
curve_gompertz <- predict(many_gompertz) #Prediction for given data (data for curve)
est_gompertz <- predict(many_gompertz, newdata=data.frame(time=seq(0, 1, 0.1))) #Extrapolation/Interpolation from curve
#Prepare the data frames
curve_logistics2 <- curve_logistics %>%
map_df(as_tibble, .id = "src") %>%
separate(src, c("sample", "treatment"), ":") %>%
mutate(regression = "logistic")
curve_gompertz2 <- curve_gompertz %>%
map_df(as_tibble, .id = "src") %>%
separate(src, c("sample", "treatment"), ":") %>%
mutate(regression = "gompertz")
alldata2<- Alldata %>%
select("sample", "treatment","total_time_days", "y_data") %>%
rename(time = "total_time_days") %>%
rename(y = "y_data") %>%
mutate(regression = "none")
comp_reg <- bind_rows(curve_logistics2, curve_gompertz2, alldata2)
#define the function to automatically generate plots#define the function to automatically generate plots
REGRESSION_LINE_PLOT <-function(x) {ggplot(data = x, aes(x=time, y=y, colour = regression, linetype = regression)) +
geom_point(size = 2.5, data = subset(x, regression %in% c("none"))) +
stat_smooth(data = subset(x, regression %in% c("gompertz", "logistic"))) +
theme_bw() +
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.spacing = unit(0,"cm"),
axis.line=element_line(colour="black"),
# axis.title.x = element_text(size=14, colour = "black"),
axis.title.x = element_blank(),
# axis.title.y = element_text(size=14, colour = "black"),
axis.title.y = element_blank(),
# axis.text.y = element_text(size=14, colour = "black"),
# axis.text.x = element_text(size=14, colour = "black"),
strip.background = element_blank(),
strip.text = element_text(size = 12, colour="black", face = "bold"),
legend.text= element_text(size = 12, colour = "black"),
legend.title=element_blank(),
text = element_text(size=12, family="Arial")) +
# plot.margin=unit(c(0.1,0.1,0.1,0.1),"cm")) +
#scale_colour_manual(values = cbbPalette) + ### here I tell R to use my custom colour palette
#scale_x_continuous(limits = c(-1,14)) + # set time range from -1 to 70 since we started sampling on day -1
#scale_y_continuous(limits = c(-1,350), breaks = seq(0, 360, 90)) + # For comparison purposes, i want all my panels to have the same y axis scale
ylab("") +
xlab("")
}
comp_reg_nested<- comp_reg %>%
group_by(sample, treatment) %>%
nest() %>%
mutate(plots=map(.x=data, ~REGRESSION_LINE_PLOT(.x)))
fo_ad_line <- comp_reg_nested[[1,"plots"]]
但是,我认为回归线在 ggplot22 中没有正确表示。有更好的方法吗?
我根据包的内置数据创建了一个与您的数据结构或多或少相似的数据示例,并稍微简化了代码,省略了默认的绘图函数。我非常喜欢您使用 map_df
构建数据框的方法,谢谢。然后我添加了一个简单的 ggplot,当然可以根据您的需要进行扩展和调整。
library(growthrates)
library(dplyr)
library(purrr)
library(tidyr)
library(ggplot2)
## use subset of built-in example data of the package
## and adapt it to the existing part of the script
data(bactgrowth)
Alldata <- bactgrowth[(bactgrowth$conc < 1) & bactgrowth$replicate == 1, ]
names(Alldata) <- c("sample", "replicate", "treatment", "total_time_days", "y_data")
Alldata$y_data <- Alldata$y_data * 1000
Alldata$treatment <- as.character(Alldata$treatment)
####Using logistic regression to fit the data across mutliple groups
p <- c(y0 = 1, mumax = 0.5, K = 200)
lower <- c(y0 = 0, mumax = 0, K = 20)
upper <- c(y0 = 100, mumax = 5, K = 400)
many_logistics <- all_growthmodels(y_data ~
grow_logistic(total_time_days, parms) | sample + treatment,
data = Alldata,
p = p,
lower = lower,
upper = upper)
many_logistics_results <- results(many_logistics)
curve_logistics <- predict(many_logistics)
####Using Gompertz regression to fit the data across mutliple groups
many_gompertz <- all_growthmodels(y_data ~
grow_gompertz(total_time_days, parms) | sample + treatment,
data = Alldata,
p = p,
lower = lower,
upper = upper)
many_gompertz_results <- results(many_gompertz)
curve_gompertz <- predict(many_gompertz)
#Prepare the data frames
curve_logistics2 <- curve_logistics %>%
map_df(as_tibble, .id = "src") %>%
separate(src, c("sample", "treatment"), ":") %>%
mutate(regression = "logistic")
curve_gompertz2 <- curve_gompertz %>%
map_df(as_tibble, .id = "src") %>%
separate(src, c("sample", "treatment"), ":") %>%
mutate(regression = "gompertz")
alldata2<- Alldata %>%
rename(time = "total_time_days", y = "y_data")
## combine the two curves to a joint data frame
comp_reg <- bind_rows(curve_logistics2, curve_gompertz2)
## plot it
ggplot(comp_reg, aes(time, y)) +
geom_point(data = alldata2) +
geom_line(aes(color = regression)) +
facet_grid(treatment ~ sample)