在 R 底图中绘制 GLM、R 平方和 p 值
Plot a GLM, R squared and p-value in R base plot
我有关于 1987-2018 年 30 年期间随变量(计数)变化的每月人数的数据。我将这段时间分成三个较短的时间段,以检查当时和现在的相关性是否大致相同,拟合一个模型,将 var 作为响应,将人数和时间段作为预测变量,以及响应的泊松分布。
我已经在 ggplot 中制作了这个脚本,但想让它在基本的 R 绘图模式下工作。我还想要三个图中每个图中的方程式、R 平方和 p 值。此脚本适用于 ggplot:
library(ggplot2)
p <- ggplot(df, aes(people,var)) + geom_point()
p + facet_grid(rows=vars(period))
p + facet_grid(rows=vars(period)) +
stat_smooth(method="glm", method.args = list(family = "poisson"))
summary(glm(var~people+period, family=poisson, data=df))
它使这个情节:
这是数据:
> dput(df)
structure(list(period = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), month = c("JAN",
"FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT",
"NOV", "DEC", "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL",
"AUG", "SEP", "OCT", "NOV", "DEC", "JAN", "FEB", "MAR", "APR",
"MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"), people = c(4068L,
7251L, 14384L, 20513L, 18748L, 17760L, 23433L, 22878L, 12815L,
8101L, 7477L, 5018L, 5063L, 11103L, 20946L, 32192L, 22665L, 29605L,
37590L, 36692L, 16474L, 8989L, 5729L, 3909L, 8391L, 18638L, 31223L,
40583L, 26889L, 40074L, 53218L, 52087L, 23873L, 13694L, 9354L,
9822L), var = c(2L, 1L, 3L, 5L, 3L, 0L, 4L, 5L, 1L, 1L, 0L, 1L,
0L, 2L, 1L, 1L, 4L, 1L, 3L, 4L, 0L, 0L, 0L, 1L, 0L, 0L, 2L, 6L,
1L, 2L, 2L, 3L, 1L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-36L))
我想你想要的在下面。不确定你对 R-squared 的意图,现在它被编码为拟合值和观察值之间的平方相关,但这不是线性回归,因此拟合度量在这里可能不同样合适.无论如何,您可以在那里进行任何您想要的计算。
par(mfrow=c(2,2))
with(subset(df, period == 1),
plot(var ~ people,
col=cols[1],
pch=shapes[1],
xlim = range(df$people),
ylim=range(e$fit),
main="Period 1"))
mod1 <- glm(var~people, family=poisson, data=subset(df, period==1))
b1 <- coef(mod1)
txt <- paste("log(mu)=", round(b1[1], 3), " + ", round(b1[2], 6), "People", sep="")
text(2000, 50, labels=txt, pos=4)
text(2000, 57.5, label=paste("R-squared = ", round(cor(mod1$fitted, mod1$y)^2, 3)), pos=4)
text(2000, 42.5, label=paste("p-value = ", round(summary(mod1)$coefficients[2,4], 3)), pos=4)
with(subset(e, period == 1),
lines(people, fit, col=cols[1]))
with(subset(df, period == 2),
plot(var ~ people,
col=cols[2],
pch=shapes[2],
xlim = range(df$people),
ylim=range(e$fit),
main="Period 2"))
mod2 <- glm(var~people, family=poisson, data=subset(df, period==2))
b2 <- coef(mod2)
txt <- paste("log(mu)=", round(b2[1], 3), " + ", round(b2[2], 6), "People", sep="")
text(2000, 50, labels=txt, pos=4)
text(2000, 57.5, label=paste("R-squared = ", round(cor(mod2$fitted, mod2$y)^2, 3)), pos=4)
text(2000, 42.5, label=paste("p-value = ", round(summary(mod2)$coefficients[2,4], 3)), pos=4)
with(subset(e, period == 2),
lines(people, fit, col=cols[2]))
with(subset(df, period == 3),
plot(var ~ people,
col=cols[3],
pch=shapes[3],
xlim = range(df$people),
ylim=range(e$fit),
main="Period 3"))
mod3 <- glm(var~people, family=poisson, data=subset(df, period==3))
b3 <- coef(mod3)
txt <- paste("log(mu)=", round(b3[1], 3), " + ", round(b3[2], 6), "People", sep="")
text(2000, 50, labels=txt, pos=4)
text(2000, 57.5, label=paste("R-squared = ", round(cor(mod3$fitted, mod3$y)^2, 3)), pos=4)
text(2000, 42.5, label=paste("p-value = ", round(summary(mod3)$coefficients[2,4], 3)), pos=4)
with(subset(e, period == 3),
lines(people, fit, col=cols[3]))
我有关于 1987-2018 年 30 年期间随变量(计数)变化的每月人数的数据。我将这段时间分成三个较短的时间段,以检查当时和现在的相关性是否大致相同,拟合一个模型,将 var 作为响应,将人数和时间段作为预测变量,以及响应的泊松分布。
我已经在 ggplot 中制作了这个脚本,但想让它在基本的 R 绘图模式下工作。我还想要三个图中每个图中的方程式、R 平方和 p 值。此脚本适用于 ggplot:
library(ggplot2)
p <- ggplot(df, aes(people,var)) + geom_point()
p + facet_grid(rows=vars(period))
p + facet_grid(rows=vars(period)) +
stat_smooth(method="glm", method.args = list(family = "poisson"))
summary(glm(var~people+period, family=poisson, data=df))
它使这个情节:
这是数据:
> dput(df)
structure(list(period = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), month = c("JAN",
"FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT",
"NOV", "DEC", "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL",
"AUG", "SEP", "OCT", "NOV", "DEC", "JAN", "FEB", "MAR", "APR",
"MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"), people = c(4068L,
7251L, 14384L, 20513L, 18748L, 17760L, 23433L, 22878L, 12815L,
8101L, 7477L, 5018L, 5063L, 11103L, 20946L, 32192L, 22665L, 29605L,
37590L, 36692L, 16474L, 8989L, 5729L, 3909L, 8391L, 18638L, 31223L,
40583L, 26889L, 40074L, 53218L, 52087L, 23873L, 13694L, 9354L,
9822L), var = c(2L, 1L, 3L, 5L, 3L, 0L, 4L, 5L, 1L, 1L, 0L, 1L,
0L, 2L, 1L, 1L, 4L, 1L, 3L, 4L, 0L, 0L, 0L, 1L, 0L, 0L, 2L, 6L,
1L, 2L, 2L, 3L, 1L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-36L))
我想你想要的在下面。不确定你对 R-squared 的意图,现在它被编码为拟合值和观察值之间的平方相关,但这不是线性回归,因此拟合度量在这里可能不同样合适.无论如何,您可以在那里进行任何您想要的计算。
par(mfrow=c(2,2))
with(subset(df, period == 1),
plot(var ~ people,
col=cols[1],
pch=shapes[1],
xlim = range(df$people),
ylim=range(e$fit),
main="Period 1"))
mod1 <- glm(var~people, family=poisson, data=subset(df, period==1))
b1 <- coef(mod1)
txt <- paste("log(mu)=", round(b1[1], 3), " + ", round(b1[2], 6), "People", sep="")
text(2000, 50, labels=txt, pos=4)
text(2000, 57.5, label=paste("R-squared = ", round(cor(mod1$fitted, mod1$y)^2, 3)), pos=4)
text(2000, 42.5, label=paste("p-value = ", round(summary(mod1)$coefficients[2,4], 3)), pos=4)
with(subset(e, period == 1),
lines(people, fit, col=cols[1]))
with(subset(df, period == 2),
plot(var ~ people,
col=cols[2],
pch=shapes[2],
xlim = range(df$people),
ylim=range(e$fit),
main="Period 2"))
mod2 <- glm(var~people, family=poisson, data=subset(df, period==2))
b2 <- coef(mod2)
txt <- paste("log(mu)=", round(b2[1], 3), " + ", round(b2[2], 6), "People", sep="")
text(2000, 50, labels=txt, pos=4)
text(2000, 57.5, label=paste("R-squared = ", round(cor(mod2$fitted, mod2$y)^2, 3)), pos=4)
text(2000, 42.5, label=paste("p-value = ", round(summary(mod2)$coefficients[2,4], 3)), pos=4)
with(subset(e, period == 2),
lines(people, fit, col=cols[2]))
with(subset(df, period == 3),
plot(var ~ people,
col=cols[3],
pch=shapes[3],
xlim = range(df$people),
ylim=range(e$fit),
main="Period 3"))
mod3 <- glm(var~people, family=poisson, data=subset(df, period==3))
b3 <- coef(mod3)
txt <- paste("log(mu)=", round(b3[1], 3), " + ", round(b3[2], 6), "People", sep="")
text(2000, 50, labels=txt, pos=4)
text(2000, 57.5, label=paste("R-squared = ", round(cor(mod3$fitted, mod3$y)^2, 3)), pos=4)
text(2000, 42.5, label=paste("p-value = ", round(summary(mod3)$coefficients[2,4], 3)), pos=4)
with(subset(e, period == 3),
lines(people, fit, col=cols[3]))