我如何使用模型和 运行 F-test ggplot
How can I ggplot a tibble with models and run F-test
我有一个小问题,我已经为不同的数据子集拟合了一个模型。
我现在想在进行 F 检验之前将每个模型绘制在包含所有点的数据上,以查看包含“Site_class”变量是否对模型有任何好处。
数据:
sitedata <- structure(list(Site_class = c("1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "4", "4", "4", "4", "4", "4", "4", "4",
"4", "4", "4", "4", "All", "All", "All", "All", "All", "All",
"All", "All", "All", "All", "All", "All", "All", "All", "All",
"All", "All", "All", "All", "All", "All", "All", "All", "All",
"All", "All", "All", "All", "All", "All", "All", "All", "All",
"All", "All", "All", "All", "All", "All", "All", "All", "All",
"All", "All", "All", "All", "All", "All", "All", "All", "All",
"All", "All", "All", "All", "All", "All", "All", "All"), VarA = c(2.4,
6.5, 11.3, 16.1, 20.5, 24.3, 27.4, 30, 32, 33.6, 34.9, 36, 0.75,
2.45, 4.75, 7.45, 10.3, 13.05, 15.55, 17.7, 19.4, 20.75, 21.9,
22.8, 2.4, 6.5, 11.3, 16.1, 20.5, 24.3, 27.4, 30, 32, 33.6, 34.9,
36, 1.85, 5.15, 9.1, 13.2, 17.1, 20.55, 23.45, 25.9, 27.8, 29.3,
30.55, 31.6, 1.3, 3.8, 6.95, 10.35, 13.7, 16.8, 19.5, 21.8, 23.6,
25.05, 26.25, 27.2, 0.75, 2.45, 4.75, 7.45, 10.3, 13.05, 15.55,
17.7, 19.4, 20.75, 21.9, 22.8, 1.1, 2.6, 4.6, 6.9, 9.3, 11.6,
13.6, 15.2, 16.5, 17.55, 18.4), VarB = c(12, 81, 220, 403, 605,
806, 991, 1153, 1288, 1399, 1495, 1578, 1, 11, 45, 106, 189,
283, 381, 473, 552, 619, 675, 723, 12, 81, 220, 403, 605, 806,
991, 1153, 1288, 1399, 1495, 1578, 4, 51, 148, 286, 446, 609,
760, 893, 1008, 1105, 1186, 1255, 2, 27, 93, 190, 307, 433, 557,
670, 766, 848, 917, 975, 1, 11, 45, 106, 189, 283, 381, 473,
552, 619, 675, 723, 2, 11, 42, 94, 161, 234, 304, 368, 423, 469,
508)), row.names = c(NA, -83L), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), groups = structure(list(Site_class = c("1",
"4", "All"), .rows = list(1:12, 13:24, 25:83)), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"), .drop = FALSE))
这就是我到目前为止的工作方式。
library(dplyr)
library(tidyr)
library(purrr)
library(ggplot2)
#Create model
modfit <- function(df){
nls(VarB ~ a * (VarA^b), data=df, start=c(a=1,b=1))
}
#Nest the original data.frame
sitedata <- sitedata %>% group_by(Site_class) %>% nest()
#Fit the models
sitedata_model <- sitedata %>% mutate(
Model= map(.x=data, .f= modfit)
)
#Attempt to plot the models:
ggplot(sitedata_model[[2]][[3]], aes(x=VarA,y=VarB)) + #All the data
geom_point()+
geom_function(fun=~sitedata_model[[3]][[1]]) + # I assume I will have to plot them separately?
geom_function(fun=~sitedata_model[[3]][[2]]) +
geom_function(fun=~sitedata_model[[3]][[3]])
看来我已经成功创建了模型,但是它不理解我对它们进行绘图的调用。我也尝试过使用 predict
,但没有成功。
我怎样才能:
- 将它们绘制在我的图表上 &
- 运行 对所有 3 个模型进行 F 检验,看看是否存在差异?
geom_function
需要一个函数。不是公式,不是模型,而是函数.
我对这个 geom 没有太多经验,而且我不确定以下是否有效,但它似乎确实适用于您的用例:
sitedata_model <- sitedata %>%
#Fit the models
mutate(Model = purrr::map(.x=data, .f= modfit)) %>%
# extract formula from each model, convert to one-sided form, &
# replace coefficients with fitted values, & store in dataframe
# as character string
rowwise() %>%
mutate(func = formula(Model) %>%
as.character() %>%
magrittr::extract(3) %>%
gsub("VarA", ".x", ., fixed = T) %>%
gsub("a", Model$m$getPars()[1], .) %>%
gsub("b", Model$m$getPars()[2], .) %>%
paste("~", ., collapse = "")) %>%
ungroup()
# plot
ggplot(data = sitedata_model$data[[3]]) +
geom_point(aes(x = VarA, y = VarB)) +
# add formula in each row as a separate geom_function layer
lapply(seq(1, nrow(sitedata_model)),
function(i) geom_function(fun = rlang::as_function(formula(sitedata_model$func[i])),
aes(colour = sitedata_model$Site_class[i]))) +
# change legend name (can also change palette / labels / etc.)
scale_colour_discrete(name = "Site class")
至于运行 F-test,你指的是ANOVA吗?
> anova(sitedata_model$Model[[1]], sitedata_model$Model[[2]], sitedata_model$Model[[3]])
Analysis of Variance Table
Model 1: VarB ~ a * (VarA^b)
Model 2: VarB ~ a * (VarA^b)
Model 3: VarB ~ a * (VarA^b)
Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F)
1 10 60.14
2 10 90.36 0 0.00
3 57 741.38 -47 -651.02 1.5328 0.2386
有关如何解释 ANOVA 输出的说明,请参阅 here。
我有一个小问题,我已经为不同的数据子集拟合了一个模型。 我现在想在进行 F 检验之前将每个模型绘制在包含所有点的数据上,以查看包含“Site_class”变量是否对模型有任何好处。
数据:
sitedata <- structure(list(Site_class = c("1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "4", "4", "4", "4", "4", "4", "4", "4",
"4", "4", "4", "4", "All", "All", "All", "All", "All", "All",
"All", "All", "All", "All", "All", "All", "All", "All", "All",
"All", "All", "All", "All", "All", "All", "All", "All", "All",
"All", "All", "All", "All", "All", "All", "All", "All", "All",
"All", "All", "All", "All", "All", "All", "All", "All", "All",
"All", "All", "All", "All", "All", "All", "All", "All", "All",
"All", "All", "All", "All", "All", "All", "All", "All"), VarA = c(2.4,
6.5, 11.3, 16.1, 20.5, 24.3, 27.4, 30, 32, 33.6, 34.9, 36, 0.75,
2.45, 4.75, 7.45, 10.3, 13.05, 15.55, 17.7, 19.4, 20.75, 21.9,
22.8, 2.4, 6.5, 11.3, 16.1, 20.5, 24.3, 27.4, 30, 32, 33.6, 34.9,
36, 1.85, 5.15, 9.1, 13.2, 17.1, 20.55, 23.45, 25.9, 27.8, 29.3,
30.55, 31.6, 1.3, 3.8, 6.95, 10.35, 13.7, 16.8, 19.5, 21.8, 23.6,
25.05, 26.25, 27.2, 0.75, 2.45, 4.75, 7.45, 10.3, 13.05, 15.55,
17.7, 19.4, 20.75, 21.9, 22.8, 1.1, 2.6, 4.6, 6.9, 9.3, 11.6,
13.6, 15.2, 16.5, 17.55, 18.4), VarB = c(12, 81, 220, 403, 605,
806, 991, 1153, 1288, 1399, 1495, 1578, 1, 11, 45, 106, 189,
283, 381, 473, 552, 619, 675, 723, 12, 81, 220, 403, 605, 806,
991, 1153, 1288, 1399, 1495, 1578, 4, 51, 148, 286, 446, 609,
760, 893, 1008, 1105, 1186, 1255, 2, 27, 93, 190, 307, 433, 557,
670, 766, 848, 917, 975, 1, 11, 45, 106, 189, 283, 381, 473,
552, 619, 675, 723, 2, 11, 42, 94, 161, 234, 304, 368, 423, 469,
508)), row.names = c(NA, -83L), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), groups = structure(list(Site_class = c("1",
"4", "All"), .rows = list(1:12, 13:24, 25:83)), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"), .drop = FALSE))
这就是我到目前为止的工作方式。
library(dplyr)
library(tidyr)
library(purrr)
library(ggplot2)
#Create model
modfit <- function(df){
nls(VarB ~ a * (VarA^b), data=df, start=c(a=1,b=1))
}
#Nest the original data.frame
sitedata <- sitedata %>% group_by(Site_class) %>% nest()
#Fit the models
sitedata_model <- sitedata %>% mutate(
Model= map(.x=data, .f= modfit)
)
#Attempt to plot the models:
ggplot(sitedata_model[[2]][[3]], aes(x=VarA,y=VarB)) + #All the data
geom_point()+
geom_function(fun=~sitedata_model[[3]][[1]]) + # I assume I will have to plot them separately?
geom_function(fun=~sitedata_model[[3]][[2]]) +
geom_function(fun=~sitedata_model[[3]][[3]])
看来我已经成功创建了模型,但是它不理解我对它们进行绘图的调用。我也尝试过使用 predict
,但没有成功。
我怎样才能:
- 将它们绘制在我的图表上 &
- 运行 对所有 3 个模型进行 F 检验,看看是否存在差异?
geom_function
需要一个函数。不是公式,不是模型,而是函数.
我对这个 geom 没有太多经验,而且我不确定以下是否有效,但它似乎确实适用于您的用例:
sitedata_model <- sitedata %>%
#Fit the models
mutate(Model = purrr::map(.x=data, .f= modfit)) %>%
# extract formula from each model, convert to one-sided form, &
# replace coefficients with fitted values, & store in dataframe
# as character string
rowwise() %>%
mutate(func = formula(Model) %>%
as.character() %>%
magrittr::extract(3) %>%
gsub("VarA", ".x", ., fixed = T) %>%
gsub("a", Model$m$getPars()[1], .) %>%
gsub("b", Model$m$getPars()[2], .) %>%
paste("~", ., collapse = "")) %>%
ungroup()
# plot
ggplot(data = sitedata_model$data[[3]]) +
geom_point(aes(x = VarA, y = VarB)) +
# add formula in each row as a separate geom_function layer
lapply(seq(1, nrow(sitedata_model)),
function(i) geom_function(fun = rlang::as_function(formula(sitedata_model$func[i])),
aes(colour = sitedata_model$Site_class[i]))) +
# change legend name (can also change palette / labels / etc.)
scale_colour_discrete(name = "Site class")
至于运行 F-test,你指的是ANOVA吗?
> anova(sitedata_model$Model[[1]], sitedata_model$Model[[2]], sitedata_model$Model[[3]])
Analysis of Variance Table
Model 1: VarB ~ a * (VarA^b)
Model 2: VarB ~ a * (VarA^b)
Model 3: VarB ~ a * (VarA^b)
Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F)
1 10 60.14
2 10 90.36 0 0.00
3 57 741.38 -47 -651.02 1.5328 0.2386
有关如何解释 ANOVA 输出的说明,请参阅 here。