如何使用带插入符号的 nls 进行交叉验证
How to use nls with caret to do cross-validation
我已经使用 nls
将多个模型拟合到相同的数据,并且我正在尝试弄清楚如何使用 caret
进行 K 折交叉验证(例如,here). asked a general question about using nls
in caret
for a single model. However, the answer referred them to this resource,这超出了我的理解(即如何适应nls
),尤其是对于拟合几个不同的nls
函数。
以下是适合 nls
的模型对象的示例数据和示例:
library("caret")
df <-
structure(list(c = c(123.86, 208.75, 141.5, 230.73, 143.4, 209.31,
161.15, 130.87, 232.05, 121.61, 176.31, 139.01, 131.92, 156.61,
150.05, 121, 134.12, 146.83, 181.39, 115, 147.87, 161.49, 107.65,
115.51, 144.11), q = c(0.028, 0.004, 0.049, 0.001, 0.049, 0.004,
0.016, 0.015, 0.003, 0.026, 0.002, 0.009, 0.148, 0.012, 0.017,
0.086, 0.02, 0.038, 0.003, 0.031, 0.011, 0.032, 0.132, 0.093,
0.026)), row.names = c(NA, -25L), class = c("tbl_df", "tbl",
"data.frame"))
# Model 1
eq1 <- function(q,a,n) (a*q**(-1/n))
eq1_fit <- nls(c ~ eq1(q,a,n), data=df,start=list(a=380, n=5))
# Model 2
eq2 <- function(q,h,g,n,c0) (h*exp(-g*q**(1/n))+c0)
eq2_fit <- nls(c ~ eq2(q,h,g,n=5,c0=6), data=df,start=list(h=100,g=1))
有几个额外的模型都适合使用 nls
相同的数据,我想按照示例或类似 shown here 做 5 倍或 10 倍 CV。使用 tidymodels
和 workflowsets
的解决方案也可以。感谢您的帮助!
您可以使用 rsample
包中的 vfold_cv()
函数执行此操作,该包位于 tidymodels
生态系统中。
df <-
structure(list(c = c(123.86, 208.75, 141.5, 230.73, 143.4, 209.31,
161.15, 130.87, 232.05, 121.61, 176.31, 139.01, 131.92, 156.61,
150.05, 121, 134.12, 146.83, 181.39, 115, 147.87, 161.49, 107.65,
115.51, 144.11), q = c(0.028, 0.004, 0.049, 0.001, 0.049, 0.004,
0.016, 0.015, 0.003, 0.026, 0.002, 0.009, 0.148, 0.012, 0.017,
0.086, 0.02, 0.038, 0.003, 0.031, 0.011, 0.032, 0.132, 0.093,
0.026)), row.names = c(NA, -25L), class = c("tbl_df", "tbl",
"data.frame"))
library(rsample)
library(tidyverse)
## define equations
eq1 <- function(q,a,n) (a*q**(-1/n))
eq2 <- function(q,h,g,n,c0) (h*exp(-g*q**(1/n))+c0)
eq1_fit <- nls(c ~ eq1(q,a,n), data=df,start=list(a=380, n=5))
eq2_fit <- nls(c ~ eq2(q,h,g,n=5,c0=6), data=df,start=list(h=100,g=1))
y <- df$c
## create the squared errors getting model predictions
## from the assessment partition
r1 <- (y - predict(eq1_fit, newdata=df))
r2 <- (y - predict(eq2_fit, newdata=df))
e1 <- (y - predict(eq1_fit, newdata=df))^2
e2 <- (y - predict(eq2_fit, newdata=df))^2
## define function that will do the model fitting and assessment
cv_mods <- function(split, ...){
## fit models with the analysis partition
eq1_fit <- nls(c ~ eq1(q,a,n), data=analysis(split),start=list(a=380, n=5))
eq2_fit <- nls(c ~ eq2(q,h,g,n=5,c0=6), data=analysis(split),start=list(h=100,g=1))
## take the dependent variable from the assessment partition
y <- assessment(split)$c
## create the residuals getting model predictions
## from the assessment partition
e1 <- (y - predict(eq1_fit, newdata=assessment(split)))
e2 <- (y - predict(eq2_fit, newdata=assessment(split)))
## return the cross-validated residuals from both models as
## a data frame.
data.frame(e1 = e1, e2 = e2)
}
## estimate the cross-validation
## the vfold_cv function sets up the cross-validation partitions
## I used 10 repeats here for speed, but in the "real world" you
## would probably want lots more
out <- vfold_cv(df,
v=10,
repeats = 10) %>%
## estimate the cv on all of the partitions
mutate(err = map(splits,
cv_mods)) %>%
## unnest the error column to turn it into two
## columns in your data frame
unnest(err)
## First analysis: Pr(mod1 better than mod2)
out1 <- out %>%
## group by repeat
group_by(id) %>%
## calculate the standard deviation of the errors for each level of id
summarise(across(e1:e2, ~sd(.x))) %>%
## calculate the probability across repeats that model 1 is better
## than model 2
summarise(p_eq1_better = mean(e1<e2))
out1
#> # A tibble: 1 × 1
#> p_eq1_better
#> <dbl>
#> 1 1
## alternatively, follow similar steps to above,
## but get the average cross-validation error
## for each model:
out2 <- out %>% group_by(id) %>%
## sum up the sums of squared errors across partitions
summarise(across(e1:e2, ~sd(.x))) %>%
## calculate average CV error:
summarise(across(e1:e2, ~mean(.x)))
out2
#> # A tibble: 1 × 2
#> e1 e2
#> <dbl> <dbl>
#> 1 19.7 21.2
由 reprex package (v2.0.1)
于 2022-04-12 创建
我已经使用 nls
将多个模型拟合到相同的数据,并且我正在尝试弄清楚如何使用 caret
进行 K 折交叉验证(例如,here). nls
in caret
for a single model. However, the answer referred them to this resource,这超出了我的理解(即如何适应nls
),尤其是对于拟合几个不同的nls
函数。
以下是适合 nls
的模型对象的示例数据和示例:
library("caret")
df <-
structure(list(c = c(123.86, 208.75, 141.5, 230.73, 143.4, 209.31,
161.15, 130.87, 232.05, 121.61, 176.31, 139.01, 131.92, 156.61,
150.05, 121, 134.12, 146.83, 181.39, 115, 147.87, 161.49, 107.65,
115.51, 144.11), q = c(0.028, 0.004, 0.049, 0.001, 0.049, 0.004,
0.016, 0.015, 0.003, 0.026, 0.002, 0.009, 0.148, 0.012, 0.017,
0.086, 0.02, 0.038, 0.003, 0.031, 0.011, 0.032, 0.132, 0.093,
0.026)), row.names = c(NA, -25L), class = c("tbl_df", "tbl",
"data.frame"))
# Model 1
eq1 <- function(q,a,n) (a*q**(-1/n))
eq1_fit <- nls(c ~ eq1(q,a,n), data=df,start=list(a=380, n=5))
# Model 2
eq2 <- function(q,h,g,n,c0) (h*exp(-g*q**(1/n))+c0)
eq2_fit <- nls(c ~ eq2(q,h,g,n=5,c0=6), data=df,start=list(h=100,g=1))
有几个额外的模型都适合使用 nls
相同的数据,我想按照示例或类似 shown here 做 5 倍或 10 倍 CV。使用 tidymodels
和 workflowsets
的解决方案也可以。感谢您的帮助!
您可以使用 rsample
包中的 vfold_cv()
函数执行此操作,该包位于 tidymodels
生态系统中。
df <-
structure(list(c = c(123.86, 208.75, 141.5, 230.73, 143.4, 209.31,
161.15, 130.87, 232.05, 121.61, 176.31, 139.01, 131.92, 156.61,
150.05, 121, 134.12, 146.83, 181.39, 115, 147.87, 161.49, 107.65,
115.51, 144.11), q = c(0.028, 0.004, 0.049, 0.001, 0.049, 0.004,
0.016, 0.015, 0.003, 0.026, 0.002, 0.009, 0.148, 0.012, 0.017,
0.086, 0.02, 0.038, 0.003, 0.031, 0.011, 0.032, 0.132, 0.093,
0.026)), row.names = c(NA, -25L), class = c("tbl_df", "tbl",
"data.frame"))
library(rsample)
library(tidyverse)
## define equations
eq1 <- function(q,a,n) (a*q**(-1/n))
eq2 <- function(q,h,g,n,c0) (h*exp(-g*q**(1/n))+c0)
eq1_fit <- nls(c ~ eq1(q,a,n), data=df,start=list(a=380, n=5))
eq2_fit <- nls(c ~ eq2(q,h,g,n=5,c0=6), data=df,start=list(h=100,g=1))
y <- df$c
## create the squared errors getting model predictions
## from the assessment partition
r1 <- (y - predict(eq1_fit, newdata=df))
r2 <- (y - predict(eq2_fit, newdata=df))
e1 <- (y - predict(eq1_fit, newdata=df))^2
e2 <- (y - predict(eq2_fit, newdata=df))^2
## define function that will do the model fitting and assessment
cv_mods <- function(split, ...){
## fit models with the analysis partition
eq1_fit <- nls(c ~ eq1(q,a,n), data=analysis(split),start=list(a=380, n=5))
eq2_fit <- nls(c ~ eq2(q,h,g,n=5,c0=6), data=analysis(split),start=list(h=100,g=1))
## take the dependent variable from the assessment partition
y <- assessment(split)$c
## create the residuals getting model predictions
## from the assessment partition
e1 <- (y - predict(eq1_fit, newdata=assessment(split)))
e2 <- (y - predict(eq2_fit, newdata=assessment(split)))
## return the cross-validated residuals from both models as
## a data frame.
data.frame(e1 = e1, e2 = e2)
}
## estimate the cross-validation
## the vfold_cv function sets up the cross-validation partitions
## I used 10 repeats here for speed, but in the "real world" you
## would probably want lots more
out <- vfold_cv(df,
v=10,
repeats = 10) %>%
## estimate the cv on all of the partitions
mutate(err = map(splits,
cv_mods)) %>%
## unnest the error column to turn it into two
## columns in your data frame
unnest(err)
## First analysis: Pr(mod1 better than mod2)
out1 <- out %>%
## group by repeat
group_by(id) %>%
## calculate the standard deviation of the errors for each level of id
summarise(across(e1:e2, ~sd(.x))) %>%
## calculate the probability across repeats that model 1 is better
## than model 2
summarise(p_eq1_better = mean(e1<e2))
out1
#> # A tibble: 1 × 1
#> p_eq1_better
#> <dbl>
#> 1 1
## alternatively, follow similar steps to above,
## but get the average cross-validation error
## for each model:
out2 <- out %>% group_by(id) %>%
## sum up the sums of squared errors across partitions
summarise(across(e1:e2, ~sd(.x))) %>%
## calculate average CV error:
summarise(across(e1:e2, ~mean(.x)))
out2
#> # A tibble: 1 × 2
#> e1 e2
#> <dbl> <dbl>
#> 1 19.7 21.2
由 reprex package (v2.0.1)
于 2022-04-12 创建