绘制对数对数线性模型预测值的 95% 置信区间

Plotting 95% confidence intervals of log-log linear model predicted values

我将长度和体重数据拟合到对数对数线性模型中,并创建了一条回归线,其中响应已返回转换为原始比例。

接下来,我想在散点图中添加两条线,分别表示上下 95% 置信区间。

我不是 R 或统计方面的专家,但我正在努力变得更好!执行此操作的最佳方法是什么?任何帮助将不胜感激。

注意:此示例中使用的长度和重量数据来自 AFS 包中的 'ChinookArg' 数据框。

library(ggplot2)

df <- data.frame(tl = c(120.1, 115, 111.2, 110.2, 110, 109.7, 105, 100.1, 98, 92.1, 
                        99, 97.9, 94.9, 92.2, 94.9, 92.7, 92.9, 89.9, 88.7, 92, 87.7, 
                        85.1, 85.1, 82.9, 82.9, 83.8, 82.2, 81, 78.8, 78.8, 74.9, 68.1, 
                        66.8, 59.9, 113.8, 112.9, 108.1, 109.7, 103.7, 103.2, 99.9, 99, 
                        103, 103, 99.4, 97.9, 97.2, 96.7, 95.1, 92.2, 93, 92.2, 91.2, 
                        88.1, 94.6, 94.3, 92.5, 88.1, 89.8, 88.8, 87.9, 86, 87.4, 68.5, 
                        80.5, 79, 77.6, 72.8, 77.3, 78.8, 74.5, 72.6, 73.3, 74, 75.2, 
                        76.6, 72, 70.6, 71.8, 70.2, 68.2, 67.3, 67.7, 65.9, 66.3, 64.7, 
                        63, 62.7, 64.2, 61.3, 64.2, 60.1, 59.4, 57.7, 57.4, 56.5, 54.1, 
                        54.1, 56, 52, 50.8, 49.3, 43.8, 39.8, 39, 35.4, 36.9, 32.1, 31.9, 
                        29.2, 25.2, 18),
                 w = c(17.9, 17.2, 16.8, 15.8, 14.3, 13.8, 12.8, 11.7, 12.8, 14.8, 
                       9.7, 7.3, 7.8, 9.1, 11.8, 11.3, 11.9, 11.8, 10.8, 5.9, 5.9, 9, 
                       9.8, 8.7, 7.8, 5.7, 6.7, 8.7, 8.4, 7.9, 6.5, 7.3, 5.2, 3.9, 15, 
                       16, 13.3, 11.3, 10.9, 9.8, 9.9, 10.3, 12.6, 10, 10.2, 8.3, 7.9, 
                       8.9, 9.4, 8.9, 8.1, 8.3, 8.3, 8.3, 6.2, 6.6, 6.6, 8.3, 6.3, 6.3, 
                       6.8, 6.8, 5.5, 5, 6.1, 6.6, 7.2, 6.1, 4.1, 4.8, 4.6, 5, 3.7, 
                       3, 2.5, 3.1, 2.4, 2.5, 3, 3.7, 3.5, 2.9, 2.4, 2.3, 3.5, 3.6, 
                       3.7, 3, 2.5, 2.4, 1.6, 1.4, 2.5, 2.6, 1.9, 1.5, 1.8, 2.8, 3.1, 
                       1.4, 1.8, 1, 0.7, 0.7, 0.7, 0.5, 3, 2.8, 0.3, 0.3, 0.3, 0.1))


model<- lm(log(w)~(log(tl)), data = df)

nmodel<- data.frame(tl = seq(from = min(df$tl), to = max(df$tl), length= 100))
nmodel$predicted<- exp(predict(model, nmodel, type = "response"))

plot <- ggplot()+
  geom_line(aes(x = tl, y = predicted), col = "black", data = nmodel)+
  geom_point(data = df, aes(x=tl, y=w))+
  xlab("Length")+
  ylab("Weight")
plot

只需将 interval 参数添加到 predict() 并指定您想要的置信区间。

nmodel<- data.frame(tl = seq(from = min(df$tl), to = max(df$tl), length= 100))
model_preds <- exp(predict(model, nmodel, type = "response", interval = "confidence"))

nmodel <- cbind(nmodel, model_preds)

plot <- ggplot()+
  geom_line(aes(x = tl, y = fit), col = "black", data = nmodel)+
  geom_line(aes(x = tl, y = lwr), col = "red", data = nmodel)+
  geom_line(aes(x = tl, y = upr), col = "red", data = nmodel)+
  geom_point(data = df, aes(x=tl, y=w))+
  xlab("Length")+
  ylab("Weight")
plot

请注意,我删除了 predicted 列,因为当您 运行 如上所示的 predict() 函数时,它还提供了一个 fit 列,相当于同样的事情。