在 R 中绘制 vs ggplot2 以及如何提取拟合参数
Plot vs ggplot2 in R and how to extract fit parameters
我在名为 t
的 data.frame 中有以下数据
DayNum MeanVolume StdDev StdErr
1 13 207.0500 41.00045 5.125057
2 15 142.7625 27.87236 3.484045
3 18 77.5500 19.43928 2.429910
4 21 66.3750 20.56403 2.570504
5 26 67.0500 29.01576 3.626970
6 29 66.4750 25.94537 3.243171
7 33 76.9625 25.31374 3.164218
8 36 91.2875 37.01719 4.627149
9 40 102.0500 29.39898 3.674872
10 43 100.8250 24.22830 3.028538
11 47 120.5125 28.80592 3.600740
12 50 147.8875 35.82894 4.478617
13 54 126.7875 45.43204 5.679004
14 57 139.8500 56.01117 7.001397
15 60 179.1375 69.64526 8.705658
16 64 149.7625 39.10265 4.887831
17 68 229.5250 121.08411 15.135514
18 71 236.5125 76.23146 9.528933
19 75 243.2750 101.69474 12.711842
20 78 331.6750 141.25344 17.656680
21 82 348.2875 122.86359 15.357948
22 85 353.7750 187.24641 23.405801
23 89 385.4000 154.05826 19.257283
24 92 500.9875 263.43714 32.929642
25 95 570.2250 301.82686 37.728358
26 98 692.2250 344.71226 43.089032
27 102 692.8000 283.94120 35.492650
28 105 759.2000 399.19323 49.899153
29 109 898.2375 444.94289 55.617861
30 112 920.1000 515.79597 64.474496
我正在尝试将 x = DayNum 拟合到 t 中的 y = MeanVolume。
这是我所做的:
适合数据
model<-lm(log(t$MeanVolume) ~ t$DayNum, data=t)
绘图数据
plot(MeanVolume~DayNum, data=t, ylab="Mean Volume (mm3)", xlim=c(0,120), ylim=c(0,1000))
arrows(t$DayNum, t$MeanVolume-t$StdErr, t$DayNum, t$MeanVolume+t$StdErr, length=0.01, angle=90, code=3)
创建拟合数据
t$pred<-exp(predict(model))
情节适合
lines(t$DayNum,t$pred,col="blue")
另一方面,如果我使用 ggplot2 通过
ggplot(data = t, mapping = aes(x = DayNum, y=MeanVolume)) +
geom_line() +
geom_point(size=3, color="blue") +
geom_smooth(method="glm", method.args=list(family=gaussian(link="log"))) +
labs(x="Days", y="Mean Volume (mm3)", title="Data") +
geom_errorbar(aes(ymin = MeanVolume - StdErr, ymax = MeanVolume + StdErr), width=.2)
我得到以下情节
如您所见,ggplot 案例中的拟合曲线比 plot 案例中的要好。为什么?我还想拟合截距和指数拟合线的斜率等参数。如何从 ggplot 调用中提取它们?
lm with log transformed y is not the same with glm with gaussian error distribution and log link (关于为什么检查@Lyngbakr评论中的link)
gz <- read.table("somet.txt")
gz <- as.data.frame(gz)
model_lm <- lm(log(MeanVolume) ~ DayNum, data = gz)
model_glm <- glm(MeanVolume ~ DayNum, data = gz, family = gaussian(link = "log"))
pred_lm <- exp(predict(model_lm))
pred_glm <- predict(model_glm, type = "response")
plot(MeanVolume ~ DayNum, data = gz, ylab = "Mean Volume (mm3)", xlim = c(0,120), ylim = c(0,1000))
arrows(gz$DayNum, gz$MeanVolume - gz$StdErr, gz$DayNum, gz$MeanVolume + gz$StdErr, length = 0.01, angle = 90, code = 3)
lines(gz$DayNum, pred_lm, col = "blue")
lines(gz$DayNum, pred_glm, col = "red")
legend("topleft", col = c("blue", "red"), lty = 1, legend = c("lm", "glm"))
关于问题的第二部分:
library(ggplot2)
p = ggplot(data = gz, mapping = aes(x = DayNum, y=MeanVolume)) +
geom_line() +
geom_point(size = 3, color="blue") +
geom_smooth(method = "glm", method.args = list(family = gaussian(link = "log"))) +
labs(x = "Days", y = "Mean Volume (mm3)", title = "Data") +
geom_errorbar(aes(ymin = MeanVolume - StdErr, ymax = MeanVolume + StdErr), width=.2)
从 ggplot 中提取数据可以使用:
build = ggplot_build(p)
曲线的数据在build$data[[3]]
p + geom_line(data = build$data[[3]], aes(x = x, y = y), lty = 2, color = "red", size = 1.5)
此数据与 pred_glm
中的数据相同 - 它更密集一些(更多数据点)。据我所知,没有方法可以仅从预测中提取 ggplot 的系数,但您始终可以如上所述构建 glm 模型。
我在名为 t
的 data.frame 中有以下数据 DayNum MeanVolume StdDev StdErr
1 13 207.0500 41.00045 5.125057
2 15 142.7625 27.87236 3.484045
3 18 77.5500 19.43928 2.429910
4 21 66.3750 20.56403 2.570504
5 26 67.0500 29.01576 3.626970
6 29 66.4750 25.94537 3.243171
7 33 76.9625 25.31374 3.164218
8 36 91.2875 37.01719 4.627149
9 40 102.0500 29.39898 3.674872
10 43 100.8250 24.22830 3.028538
11 47 120.5125 28.80592 3.600740
12 50 147.8875 35.82894 4.478617
13 54 126.7875 45.43204 5.679004
14 57 139.8500 56.01117 7.001397
15 60 179.1375 69.64526 8.705658
16 64 149.7625 39.10265 4.887831
17 68 229.5250 121.08411 15.135514
18 71 236.5125 76.23146 9.528933
19 75 243.2750 101.69474 12.711842
20 78 331.6750 141.25344 17.656680
21 82 348.2875 122.86359 15.357948
22 85 353.7750 187.24641 23.405801
23 89 385.4000 154.05826 19.257283
24 92 500.9875 263.43714 32.929642
25 95 570.2250 301.82686 37.728358
26 98 692.2250 344.71226 43.089032
27 102 692.8000 283.94120 35.492650
28 105 759.2000 399.19323 49.899153
29 109 898.2375 444.94289 55.617861
30 112 920.1000 515.79597 64.474496
我正在尝试将 x = DayNum 拟合到 t 中的 y = MeanVolume。
这是我所做的:
适合数据
model<-lm(log(t$MeanVolume) ~ t$DayNum, data=t)
绘图数据
plot(MeanVolume~DayNum, data=t, ylab="Mean Volume (mm3)", xlim=c(0,120), ylim=c(0,1000))
arrows(t$DayNum, t$MeanVolume-t$StdErr, t$DayNum, t$MeanVolume+t$StdErr, length=0.01, angle=90, code=3)
创建拟合数据
t$pred<-exp(predict(model))
情节适合
lines(t$DayNum,t$pred,col="blue")
另一方面,如果我使用 ggplot2 通过
ggplot(data = t, mapping = aes(x = DayNum, y=MeanVolume)) +
geom_line() +
geom_point(size=3, color="blue") +
geom_smooth(method="glm", method.args=list(family=gaussian(link="log"))) +
labs(x="Days", y="Mean Volume (mm3)", title="Data") +
geom_errorbar(aes(ymin = MeanVolume - StdErr, ymax = MeanVolume + StdErr), width=.2)
我得到以下情节
如您所见,ggplot 案例中的拟合曲线比 plot 案例中的要好。为什么?我还想拟合截距和指数拟合线的斜率等参数。如何从 ggplot 调用中提取它们?
lm with log transformed y is not the same with glm with gaussian error distribution and log link (关于为什么检查@Lyngbakr评论中的link)
gz <- read.table("somet.txt")
gz <- as.data.frame(gz)
model_lm <- lm(log(MeanVolume) ~ DayNum, data = gz)
model_glm <- glm(MeanVolume ~ DayNum, data = gz, family = gaussian(link = "log"))
pred_lm <- exp(predict(model_lm))
pred_glm <- predict(model_glm, type = "response")
plot(MeanVolume ~ DayNum, data = gz, ylab = "Mean Volume (mm3)", xlim = c(0,120), ylim = c(0,1000))
arrows(gz$DayNum, gz$MeanVolume - gz$StdErr, gz$DayNum, gz$MeanVolume + gz$StdErr, length = 0.01, angle = 90, code = 3)
lines(gz$DayNum, pred_lm, col = "blue")
lines(gz$DayNum, pred_glm, col = "red")
legend("topleft", col = c("blue", "red"), lty = 1, legend = c("lm", "glm"))
关于问题的第二部分:
library(ggplot2)
p = ggplot(data = gz, mapping = aes(x = DayNum, y=MeanVolume)) +
geom_line() +
geom_point(size = 3, color="blue") +
geom_smooth(method = "glm", method.args = list(family = gaussian(link = "log"))) +
labs(x = "Days", y = "Mean Volume (mm3)", title = "Data") +
geom_errorbar(aes(ymin = MeanVolume - StdErr, ymax = MeanVolume + StdErr), width=.2)
从 ggplot 中提取数据可以使用:
build = ggplot_build(p)
曲线的数据在build$data[[3]]
p + geom_line(data = build$data[[3]], aes(x = x, y = y), lty = 2, color = "red", size = 1.5)
此数据与 pred_glm
中的数据相同 - 它更密集一些(更多数据点)。据我所知,没有方法可以仅从预测中提取 ggplot 的系数,但您始终可以如上所述构建 glm 模型。