在 R 中绘制 vs ggplot2 以及如何提取拟合参数

Plot vs ggplot2 in R and how to extract fit parameters

我在名为 t

的 data.frame 中有以下数据
   DayNum   MeanVolume    StdDev    StdErr
1      13   207.0500  41.00045  5.125057
2      15   142.7625  27.87236  3.484045
3      18    77.5500  19.43928  2.429910
4      21    66.3750  20.56403  2.570504
5      26    67.0500  29.01576  3.626970
6      29    66.4750  25.94537  3.243171
7      33    76.9625  25.31374  3.164218
8      36    91.2875  37.01719  4.627149
9      40   102.0500  29.39898  3.674872
10     43   100.8250  24.22830  3.028538
11     47   120.5125  28.80592  3.600740
12     50   147.8875  35.82894  4.478617
13     54   126.7875  45.43204  5.679004
14     57   139.8500  56.01117  7.001397
15     60   179.1375  69.64526  8.705658
16     64   149.7625  39.10265  4.887831
17     68   229.5250 121.08411 15.135514
18     71   236.5125  76.23146  9.528933
19     75   243.2750 101.69474 12.711842
20     78   331.6750 141.25344 17.656680
21     82   348.2875 122.86359 15.357948
22     85   353.7750 187.24641 23.405801
23     89   385.4000 154.05826 19.257283
24     92   500.9875 263.43714 32.929642
25     95   570.2250 301.82686 37.728358
26     98   692.2250 344.71226 43.089032
27    102   692.8000 283.94120 35.492650
28    105   759.2000 399.19323 49.899153
29    109   898.2375 444.94289 55.617861
30    112   920.1000 515.79597 64.474496

我正在尝试将 x = DayNum 拟合到 t 中的 y = MeanVolume。

这是我所做的:

适合数据

model<-lm(log(t$MeanVolume) ~ t$DayNum, data=t)

绘图数据

plot(MeanVolume~DayNum, data=t, ylab="Mean Volume (mm3)", xlim=c(0,120), ylim=c(0,1000))
arrows(t$DayNum, t$MeanVolume-t$StdErr, t$DayNum, t$MeanVolume+t$StdErr, length=0.01, angle=90, code=3)

创建拟合数据

t$pred<-exp(predict(model))

情节适合

lines(t$DayNum,t$pred,col="blue")

另一方面,如果我使用 ggplot2 通过

ggplot(data = t, mapping = aes(x = DayNum, y=MeanVolume)) + 
  geom_line() + 
  geom_point(size=3, color="blue") + 
  geom_smooth(method="glm", method.args=list(family=gaussian(link="log"))) +
  labs(x="Days", y="Mean Volume (mm3)", title="Data") +
  geom_errorbar(aes(ymin = MeanVolume - StdErr, ymax = MeanVolume + StdErr), width=.2)

我得到以下情节

如您所见,ggplot 案例中的拟合曲线比 plot 案例中的要好。为什么?我还想拟合截距和指数拟合线的斜率等参数。如何从 ggplot 调用中提取它们?

lm with log transformed y is not the same with glm with gaussian error distribution and log link (关于为什么检查@Lyngbakr评论中的link)

gz <- read.table("somet.txt")
gz <- as.data.frame(gz)
model_lm <- lm(log(MeanVolume) ~ DayNum, data = gz)
model_glm <- glm(MeanVolume ~ DayNum, data = gz, family = gaussian(link = "log"))
pred_lm <- exp(predict(model_lm))
pred_glm <- predict(model_glm, type = "response")

plot(MeanVolume ~ DayNum, data = gz, ylab = "Mean Volume (mm3)", xlim = c(0,120), ylim = c(0,1000))
arrows(gz$DayNum, gz$MeanVolume - gz$StdErr, gz$DayNum, gz$MeanVolume + gz$StdErr, length = 0.01, angle = 90, code = 3)

lines(gz$DayNum, pred_lm, col = "blue")
lines(gz$DayNum, pred_glm, col = "red")

legend("topleft", col = c("blue", "red"), lty = 1, legend = c("lm", "glm"))

关于问题的第二部分:

library(ggplot2)
p = ggplot(data = gz, mapping = aes(x = DayNum, y=MeanVolume)) + 
  geom_line() + 
  geom_point(size = 3, color="blue") + 
  geom_smooth(method = "glm", method.args = list(family = gaussian(link = "log"))) +
  labs(x = "Days", y = "Mean Volume (mm3)", title = "Data") +
  geom_errorbar(aes(ymin = MeanVolume - StdErr, ymax = MeanVolume + StdErr), width=.2)

从 ggplot 中提取数据可以使用:

build = ggplot_build(p)

曲线的数据在build$data[[3]]

p +  geom_line(data = build$data[[3]], aes(x = x, y = y), lty = 2, color = "red", size = 1.5)

此数据与 pred_glm 中的数据相同 - 它更密集一些(更多数据点)。据我所知,没有方法可以仅从预测中提取 ggplot 的系数,但您始终可以如上所述构建 glm 模型。