如何计算同一横坐标值下坐标点与非线性回归的均方根误差和标准差?

How to calculate RMSE and standard deviation between a coordinate point and a non-linear regression under the same abscissa value?

原数据如下:

ISIDOR <- structure(list(Pos_heliaphen = c("W30", "X41", "Y27", "Z24", 
                                       "Y27", "W30", "W30", "X41", "Y27", "W30", "X41", "Z40", "Z99"
), traitement = c("WW", "WW", "WW", "WW", "WW", "WW", "WW", "WW", 
                  "WW", "WW", "WW", "WW", "WW"), Variete = c("Isidor", "Isidor", 
                                                             "Isidor", "Isidor", "Isidor", "Isidor", "Isidor", "Isidor", "Isidor", 
                                                             "Isidor", "Isidor", "Isidor", "Cali"), FTSW_apres_arros = c(0.462837958498518, 
                                                                                                                         0.400045032939416, 0.352560790392534, 0.377856799586057, 0.170933345859364, 
                                                                                                                         0.315689846065931, 0.116825600914318, 0.0332444780173884, 0.00966070114456602, 
                                                                                                                         0.0871102539376406, 0.0107280083093036, 0.195548432729584, 1), 
NLE = c(0.903498791068124, 0.954670066942938, 0.970762905436272, 
        0.873838605282389, 0.647875257025359, 0.53056603773585, 0.0384548155916796, 
        0.0470924009989314, 0.00403163281128882, 0.193696514297641, 
        0.0718450645564359, 0.295346695941639, 1)), class = c("tbl_df", 
                                                              "tbl", "data.frame"), row.names = c(NA, -13L))

代码如下:

pred_df <- data.frame(FTSW_apres_arros = seq(min(ISIDOR$FTSW_apres_arros), 
                                             max(ISIDOR$FTSW_apres_arros),
                                             length.out = 100))

pred_df$NLE <- predict(mod, newdata = pred_df)

mod = nls(NLE ~ 2/(1+exp(a*FTSW_apres_arros))-1,start = list(a=1),data = ISIDOR)
ISIDOR$pred = predict(mod,ISIDOR)
a = coef(mod)
RMSE = rmse(ISIDOR$NLE, ISIDOR$pred)
MSE = mse(ISIDOR$NLE, ISIDOR$pred)
Rsquared = summary(lm(ISIDOR$NLE~ ISIDOR$pred))$r.squared

ggplot(ISIDOR, aes(FTSW_apres_arros, NLE)) + 
  geom_point(aes(color = Variete), pch = 19, cex = 3) +
  geom_line(data = pred_df) +
  scale_color_manual(values = c("red3","blue3"))+
  scale_y_continuous(limits = c(0, 1.0)) +
  scale_x_continuous(limits = c(0, 1)) +
  labs(title = "Isidor", 
       y = "Expansion folliaire totale relative",
       x = "FTSW",
       subtitle = paste0("y = 2/(1 + exp(", round(a, 3), "* x)) -1)","\n",
                         "R^2 = ", round(Rsquared, 3),"  RMSE = ",
                         round(RMSE, 3), "   MSE = ", round(MSE, 3)))+
  theme(plot.title = element_text(hjust = 0, size = 14, face = "bold", 
                                  colour = "black"),
        plot.subtitle = element_text(hjust = 0,size=10, face = "italic", 
                                     colour = "black"),
        legend.position = "none")

然后我得到下图:

现在我要计算RMSE标准差之间橙色圈出的坐标点(0.352560790, 0.970762905 ) 和非线性回归 y = 2/(1 + exp(-5.674* x))-1(回归线中坐标点相同横坐标 0.352560790)。有人可以给我建议吗?

非常感谢!

您没有向我们展示您是如何制作模型的,所以我假设它是这样的:

mod <- nls(NLE ~ 2/(1 + exp(a * FTSW_apres_arros)) - 1, start = list(a = -6),
           data = ISIDOR)

我们可以使用我们的模型为原始数据框中的每一行插入 NLE 的预期值,如下所示:

ISIDOR$pred  <- predict(mod)

并且我们可以得到每个点的残差(或误差)(即它与回归线的距离,如下所示:

ISIDOR$error <- summary(mod)$residuals 

这会产生以下数据框:

head(ISIDOR)
#> # A tibble: 6 x 7
#>   Pos_heliaphen traitement Variete FTSW_apres_arros   NLE  pred   error
#>   <chr>         <chr>      <chr>              <dbl> <dbl> <dbl>   <dbl>
#> 1 W30           WW         Isidor             0.463 0.903 0.865  0.0385
#> 2 X41           WW         Isidor             0.400 0.955 0.813  0.142 
#> 3 Y27           WW         Isidor             0.353 0.971 0.762  0.209 
#> 4 Z24           WW         Isidor             0.378 0.874 0.790  0.0837
#> 5 Y27           WW         Isidor             0.171 0.648 0.450  0.198 
#> 6 W30           WW         Isidor             0.316 0.531 0.714 -0.184

对于我们的绘图,我们计算各种指标并使用 geom_smooth 绘制回归线(这意味着我们不需要预测数据框)

library(geomtextpath)

Rsquared <- 1 - var(ISIDOR$error) / var(ISIDOR$NLE)
MSE      <- mean(ISIDOR$error^2)
RMSE     <- sqrt(MSE)

ggplot(ISIDOR, aes(FTSW_apres_arros, NLE)) + 
  geom_segment(aes(xend = FTSW_apres_arros, yend = pred), color = 'gray') +
  geom_textsegment(aes(xend = FTSW_apres_arros, yend = pred, 
                       label = round(error, 3)),
               color = 'red', data = ISIDOR[3,], vjust = 1.1) +
  geom_point(aes(color = Variete), shape = 19, cex = 3) +
  geom_smooth(method = nls, formula = y ~ 2/(1 + exp(a * x)) - 1,
              method.args = list(start = list(a = -6)), se = FALSE,
              color = 'black', size = 0.75) +
  scale_color_manual(values = c("red3", "blue3"))+
  scale_y_continuous(limits = c(0, 1.0)) +
  scale_x_continuous(limits = c(0, 1)) +
  labs(title = "Isidor", 
       y = "Expansion folliaire totale relative",
       x = "FTSW",
       subtitle = paste0("y = 2/(1 + exp(", round(coef(mod), 3), "* x)) - 1",
                         "\n", "R^2 = ", round(Rsquared, 3),"  RMSE = ",
                         round(RMSE, 3), "   MSE = ", round(MSE, 3)))+
  theme(plot.title = element_text(hjust = 0, size = 14, face = "bold", 
                                  colour = "black"),
        plot.subtitle = element_text(hjust = 0,size=10, face = "italic", 
                                     colour = "black"),
        legend.position = "none")