使用 predict() 函数绘制 CI 区间时出现问题
Problem when plotting CI interval using the predict() function
我正在尝试绘制具有 95% 置信区间的线性模型的结果,如下所示:
fem:
+---------------------+------------+-------------------+--------+-------------------+--------------------+-------------------+---------------------+--------------------+
| "Sitio" | "Zona" | "ID" | "Wg_g" | "GSI" | "K" | "Klog" | "Wglog" | "GSIlog" |
+---------------------+------------+-------------------+--------+-------------------+--------------------+-------------------+---------------------+--------------------+
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -27" | 2.692 | 9.15646258503401 | 0.0261364929449249 | -1.58275268748418 | 0.430075055551939 | 0.961727725139782 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -29" | 1.162 | 6.24731182795699 | 0.0255144032921811 | -1.59321458410006 | 0.0652061280543119 | 0.795693183836396 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -30" | 2.669 | 12.0769230769231 | 0.0257763522379356 | -1.58877854218143 | 0.426348573787508 | 1.0819563001024 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -32" | 2.104 | 8.99145299145299 | 0.0248620897755187 | -1.60446236966734 | 0.323045735481701 | 0.953829878071559 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -33" | 2.52 | 10.9565217391304 | 0.0259964554398148 | -1.58508586310111 | 0.401400540781544 | 1.03967270476395 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -34" | 1.434 | 5.64566929133858 | 0.0278303401108612 | -1.5554814861788 | 0.156549151331781 | 0.751715434711843 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -36" | 0.253 | 1.28426395939086 | 0.0244916125551217 | -1.61098261950021 | -0.596879478824182 | 0.108654295014225 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -38" | 0.302 | 1.5978835978836 | 0.0259259259259259 | -1.58626572414473 | -0.519993057042849 | 0.203545138783906 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -39" | 4.357 | 12.342776203966 | 0.0272580768461556 | -1.56450478843405 | 0.639187559935754 | 1.09141285454793 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -40" | 2.276 | 8.75384615384615 | 0.026 | -1.58502665202918 | 0.357172257723034 | 0.942198909752216 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -41" | 3.358 | 11.2307692307692 | 0.0244073065190365 | -1.6124801447312 | 0.52608069180203 | 1.0504095034776 |
| "Las Madres" | "Butrones" | "L.gibbosus -05" | 0.027 | 0.673316708229426 | 0.0176666769465286 | -1.75284513241212 | -1.56863623584101 | -0.171780608461195 |
| "Las Madres" | "Butrones" | "L.gibbosus -10" | 0.03 | 0.761421319796954 | 0.0157570376769167 | -1.80252542653517 | -1.52287874528034 | -0.118374967105912 |
| "Las Madres" | "Butrones" | "L.gibbosus -21" | 0.183 | 1.04214123006834 | 0.0192401878876662 | -1.71579069122865 | -0.737548910269571 | 0.0179265781603458 |
| "Las Madres" | "Butrones" | "L.gibbosus -23" | 1.143 | 5.94383775351014 | 0.0224289254993439 | -1.64919153162806 | 0.0580462303952817 | 0.774066946156802 |
| "Las Madres" | "Butrones" | "L.gibbosus -25" | 0.793 | 5.98490566037736 | 0.0194432052967693 | -1.71123213817768 | -0.100726812682396 | 0.777057309044777 |
| "Las Madres" | "Butrones" | "L.gibbosus -26" | 0.989 | 3.81853281853282 | 0.0153694695871428 | -1.81334112009634 | -0.0048037084028206 | 0.581896527515928 |
| "Las Madres" | "Butrones" | "L.gibbosus -27" | 0.069 | 0.745945945945946 | 0.0187611933335902 | -1.72673954113229 | -1.16115090926274 | -0.127292642001777 |
+---------------------+------------+-------------------+--------+-------------------+--------------------+-------------------+---------------------+--------------------+
lm1 <- lm(Wglog ~ Klog, data = fem)
newx <- seq(min(fem$Klog),max(fem$Klog),length.out = length(fem$Klog))
pred1 <- predict(lm1, new=data.frame(x=newx),level=.95,interval="confidence")
但是预测值没有意义,因为它们完全不稳定并且有些混乱:
plot(fem$Wglog ~ fem$Klog,
ylab = "Log gonad weight",
xlab = "",
xaxt = "n",
ylim = c(-3,3),
pch = c(4,20),
font.lab = 2)
abline(lm1, col = "grey", lwd = 2) #a straight line with the actual coefficients of the model
lines(x = newx, y = as.vector(pred1$fit[,1]), col="blue", lty=2, lwd = 2) #this line, if I didn´t get it wrong, should be the same as the abline
lines(x = newx, y = as.vector(pred1$fit[,2]), col="black", lty=2, lwd = 2) #these represent the confidence interval
lines(x = newx, y = as.vector(pred1$fit[,3]), col="black", lty=2, lwd = 2)
you can check the resulting plot here
如果我对预测值进行排序,它们更有意义,但无论如何它们似乎都是错误的:
lines(x = newx, y = sort(as.vector(pred1$fit[,1])), col="blue", lty=2, lwd = 2)
lines(x = newx, y = sort(as.vector(pred1$fit[,2])), col="black", lty=2, lwd = 2)
lines(x = newx, y = sort(as.vector(pred1$fit[,3])), col="black", lty=2, lwd = 2)
and the second one
有人知道我做错了什么吗?非常感谢!
这一行
pred1 <- predict(lm1, new=data.frame(x=newx),level=.95,interval="confidence")
应该是
pred1 <- predict(lm1, new=data.frame(Klog=newx),level=.95,interval="confidence")`
因为您提供的新数据框没有与模型中相同的变量(即,它没有名为 Klog
的变量),您只是得到原始模型的预测。此外,predict()
函数的结果是一个矩阵,而不是列表或数据框。以下是有效的代码:
fem <- tibble::tribble(
~"Klog" , ~"Wglog" ,
-1.58275268748418 , 0.430075055551939 ,
-1.59321458410006 , 0.0652061280543119 ,
-1.58877854218143 , 0.426348573787508 ,
-1.60446236966734 , 0.323045735481701 ,
-1.58508586310111 , 0.401400540781544 ,
-1.5554814861788 , 0.156549151331781 ,
-1.61098261950021 , -0.596879478824182 ,
-1.58626572414473 , -0.519993057042849 ,
-1.56450478843405 , 0.639187559935754 ,
-1.58502665202918 , 0.357172257723034 ,
-1.6124801447312 , 0.52608069180203 ,
-1.75284513241212 , -1.56863623584101 ,
-1.80252542653517 , -1.52287874528034 ,
-1.71579069122865 , -0.737548910269571 ,
-1.64919153162806 , 0.0580462303952817 ,
-1.71123213817768 , -0.100726812682396 ,
-1.81334112009634 , -0.0048037084028206 ,
-1.72673954113229 , -1.16115090926274 )
lm1 <- lm(Wglog ~ Klog, data = fem)
newx <- seq(min(fem$Klog),max(fem$Klog),length.out = length(fem$Klog))
pred1 <- predict(lm1, new=data.frame(Klog=newx),level=.95,interval="confidence")
plot(fem$Wglog ~ fem$Klog,
ylab = "Log gonad weight",
xlab = "",
xaxt = "n",
ylim = c(-3,3),
pch = c(4,20),
font.lab = 2)
abline(lm1, col = "grey", lwd = 2) #a straight line with the actual coefficients of the model
lines(x = newx, y = pred1[,1], col="blue", lty=2, lwd = 2) #this line, if I didn´t get it wrong, should be the same as the abline
lines(x = newx, y = pred1[,2], col="black", lty=2, lwd = 2) #these represent the confidence interval
lines(x = newx, y = pred1[,3], col="black", lty=2, lwd = 2)
我正在尝试绘制具有 95% 置信区间的线性模型的结果,如下所示:
fem:
+---------------------+------------+-------------------+--------+-------------------+--------------------+-------------------+---------------------+--------------------+
| "Sitio" | "Zona" | "ID" | "Wg_g" | "GSI" | "K" | "Klog" | "Wglog" | "GSIlog" |
+---------------------+------------+-------------------+--------+-------------------+--------------------+-------------------+---------------------+--------------------+
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -27" | 2.692 | 9.15646258503401 | 0.0261364929449249 | -1.58275268748418 | 0.430075055551939 | 0.961727725139782 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -29" | 1.162 | 6.24731182795699 | 0.0255144032921811 | -1.59321458410006 | 0.0652061280543119 | 0.795693183836396 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -30" | 2.669 | 12.0769230769231 | 0.0257763522379356 | -1.58877854218143 | 0.426348573787508 | 1.0819563001024 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -32" | 2.104 | 8.99145299145299 | 0.0248620897755187 | -1.60446236966734 | 0.323045735481701 | 0.953829878071559 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -33" | 2.52 | 10.9565217391304 | 0.0259964554398148 | -1.58508586310111 | 0.401400540781544 | 1.03967270476395 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -34" | 1.434 | 5.64566929133858 | 0.0278303401108612 | -1.5554814861788 | 0.156549151331781 | 0.751715434711843 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -36" | 0.253 | 1.28426395939086 | 0.0244916125551217 | -1.61098261950021 | -0.596879478824182 | 0.108654295014225 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -38" | 0.302 | 1.5978835978836 | 0.0259259259259259 | -1.58626572414473 | -0.519993057042849 | 0.203545138783906 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -39" | 4.357 | 12.342776203966 | 0.0272580768461556 | -1.56450478843405 | 0.639187559935754 | 1.09141285454793 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -40" | 2.276 | 8.75384615384615 | 0.026 | -1.58502665202918 | 0.357172257723034 | 0.942198909752216 |
| "Tablas de Daimiel" | "Tablazo" | "L. gibbosus -41" | 3.358 | 11.2307692307692 | 0.0244073065190365 | -1.6124801447312 | 0.52608069180203 | 1.0504095034776 |
| "Las Madres" | "Butrones" | "L.gibbosus -05" | 0.027 | 0.673316708229426 | 0.0176666769465286 | -1.75284513241212 | -1.56863623584101 | -0.171780608461195 |
| "Las Madres" | "Butrones" | "L.gibbosus -10" | 0.03 | 0.761421319796954 | 0.0157570376769167 | -1.80252542653517 | -1.52287874528034 | -0.118374967105912 |
| "Las Madres" | "Butrones" | "L.gibbosus -21" | 0.183 | 1.04214123006834 | 0.0192401878876662 | -1.71579069122865 | -0.737548910269571 | 0.0179265781603458 |
| "Las Madres" | "Butrones" | "L.gibbosus -23" | 1.143 | 5.94383775351014 | 0.0224289254993439 | -1.64919153162806 | 0.0580462303952817 | 0.774066946156802 |
| "Las Madres" | "Butrones" | "L.gibbosus -25" | 0.793 | 5.98490566037736 | 0.0194432052967693 | -1.71123213817768 | -0.100726812682396 | 0.777057309044777 |
| "Las Madres" | "Butrones" | "L.gibbosus -26" | 0.989 | 3.81853281853282 | 0.0153694695871428 | -1.81334112009634 | -0.0048037084028206 | 0.581896527515928 |
| "Las Madres" | "Butrones" | "L.gibbosus -27" | 0.069 | 0.745945945945946 | 0.0187611933335902 | -1.72673954113229 | -1.16115090926274 | -0.127292642001777 |
+---------------------+------------+-------------------+--------+-------------------+--------------------+-------------------+---------------------+--------------------+
lm1 <- lm(Wglog ~ Klog, data = fem)
newx <- seq(min(fem$Klog),max(fem$Klog),length.out = length(fem$Klog))
pred1 <- predict(lm1, new=data.frame(x=newx),level=.95,interval="confidence")
但是预测值没有意义,因为它们完全不稳定并且有些混乱:
plot(fem$Wglog ~ fem$Klog,
ylab = "Log gonad weight",
xlab = "",
xaxt = "n",
ylim = c(-3,3),
pch = c(4,20),
font.lab = 2)
abline(lm1, col = "grey", lwd = 2) #a straight line with the actual coefficients of the model
lines(x = newx, y = as.vector(pred1$fit[,1]), col="blue", lty=2, lwd = 2) #this line, if I didn´t get it wrong, should be the same as the abline
lines(x = newx, y = as.vector(pred1$fit[,2]), col="black", lty=2, lwd = 2) #these represent the confidence interval
lines(x = newx, y = as.vector(pred1$fit[,3]), col="black", lty=2, lwd = 2)
you can check the resulting plot here
如果我对预测值进行排序,它们更有意义,但无论如何它们似乎都是错误的:
lines(x = newx, y = sort(as.vector(pred1$fit[,1])), col="blue", lty=2, lwd = 2)
lines(x = newx, y = sort(as.vector(pred1$fit[,2])), col="black", lty=2, lwd = 2)
lines(x = newx, y = sort(as.vector(pred1$fit[,3])), col="black", lty=2, lwd = 2)
and the second one
有人知道我做错了什么吗?非常感谢!
这一行
pred1 <- predict(lm1, new=data.frame(x=newx),level=.95,interval="confidence")
应该是
pred1 <- predict(lm1, new=data.frame(Klog=newx),level=.95,interval="confidence")`
因为您提供的新数据框没有与模型中相同的变量(即,它没有名为 Klog
的变量),您只是得到原始模型的预测。此外,predict()
函数的结果是一个矩阵,而不是列表或数据框。以下是有效的代码:
fem <- tibble::tribble(
~"Klog" , ~"Wglog" ,
-1.58275268748418 , 0.430075055551939 ,
-1.59321458410006 , 0.0652061280543119 ,
-1.58877854218143 , 0.426348573787508 ,
-1.60446236966734 , 0.323045735481701 ,
-1.58508586310111 , 0.401400540781544 ,
-1.5554814861788 , 0.156549151331781 ,
-1.61098261950021 , -0.596879478824182 ,
-1.58626572414473 , -0.519993057042849 ,
-1.56450478843405 , 0.639187559935754 ,
-1.58502665202918 , 0.357172257723034 ,
-1.6124801447312 , 0.52608069180203 ,
-1.75284513241212 , -1.56863623584101 ,
-1.80252542653517 , -1.52287874528034 ,
-1.71579069122865 , -0.737548910269571 ,
-1.64919153162806 , 0.0580462303952817 ,
-1.71123213817768 , -0.100726812682396 ,
-1.81334112009634 , -0.0048037084028206 ,
-1.72673954113229 , -1.16115090926274 )
lm1 <- lm(Wglog ~ Klog, data = fem)
newx <- seq(min(fem$Klog),max(fem$Klog),length.out = length(fem$Klog))
pred1 <- predict(lm1, new=data.frame(Klog=newx),level=.95,interval="confidence")
plot(fem$Wglog ~ fem$Klog,
ylab = "Log gonad weight",
xlab = "",
xaxt = "n",
ylim = c(-3,3),
pch = c(4,20),
font.lab = 2)
abline(lm1, col = "grey", lwd = 2) #a straight line with the actual coefficients of the model
lines(x = newx, y = pred1[,1], col="blue", lty=2, lwd = 2) #this line, if I didn´t get it wrong, should be the same as the abline
lines(x = newx, y = pred1[,2], col="black", lty=2, lwd = 2) #these represent the confidence interval
lines(x = newx, y = pred1[,3], col="black", lty=2, lwd = 2)