在同一张图上添加多元回归线方程、R2 和 SSE
Adding Multiple Regression Line Equations, R2 and SSE on the same graph
在 R 中,我使用 stat_poly_eq()
在绘图上注释来自线性模型的方程,我遇到了 2 个问题:
如何注释三个单独的方程式,每组一个,
另一个与整个数据?
如何在每个上添加相应的误差平方和 (SSE)
等式?
如图here所示,以下代码生成一个包含所有数据的一般方程:
x <- runif(200, 0, 100)
y <- 5*x + rnorm(200, 0, 10)
df <- data.frame(x, y)
df$GENDER[1:100] <- 1
df$GENDER[101:nrow(df)] <- 2
formula <- y ~ poly(x, 1, raw = TRUE)
my_features <- list(scale_shape_manual(values=c(16, 1)),
geom_smooth(method = "lm", aes(group = 1),
formula = formula, colour = "Black",
fill = "grey70"),
geom_smooth(method = "lm", aes(group = factor(GENDER), se = F),
formula = formula, colour = "Black"),
stat_poly_eq(aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~~")),
formula = formula, parse = TRUE)
)
ggplot(df, aes(x = x, y = y, aes(shape = factor(GENDER)))) +
geom_point(aes(shape = factor(GENDER))) +
my_features
我不得不手动添加误差平方和,并根据完整数据集定位方程。使用下面的方法。
library(ggplot2)
library(ggpmisc)
# Get Error Sum of Squares
sum((lm(y ~ poly(x, 1, raw = TRUE)))$res^2)
sum(lm(y[df$GENDER == 1] ~ poly(x[df$GENDER == 1], 1, raw = TRUE))$res^2)
sum(lm(y[df$GENDER == 2] ~ poly(x[df$GENDER == 2], 1, raw = TRUE))$res^2)
my_features <- list(
scale_shape_manual(values=c(16, 1)),
geom_smooth(method = "lm", aes(group = 1),
formula = formula, colour = "Black", fill = "grey70"),
#Added colour
geom_smooth(method = "lm", aes(group = factor(GENDER), colour = factor(GENDER)),
formula = formula, se = F),
stat_poly_eq(
aes(label = paste(paste(..eq.label.., ..rr.label.., sep = "~~~~"),
#Manually add in ESS
paste("ESS", c(9333,9622), sep = "=="),
sep = "~~~~")),
formula = formula, parse = TRUE)
)
ggplot(df, aes(x = x, y = y, shape = factor(GENDER), colour = factor(GENDER))) +
geom_point(aes(shape = factor(GENDER))) +
my_features +
#Add in overall line and label
geom_smooth(method = "lm", aes(group = 1), colour = "black") +
stat_poly_eq(aes(group = 1, label = paste(..eq.label.., ..rr.label.., 'ESS==19405', sep = "~~~~")),
formula = formula, parse = TRUE, label.y = 440)
或者你可以复制你的数据集,所以完整的数据集包含在一个因子水平本身......仍然需要手动添加 ESS。
x <- runif(200, 0, 100)
y <- 5*x + rnorm(200, 0, 10)
df1 <- data.frame(x, y)
df1$GENDER[1:100] <- 1
df1$GENDER[101:nrow(df1)] <- 2
df2 <- df1
df2$GENDER <- 3
#Now data with GENDER == 3 is the full data
df <- rbind(df1, df2)
my_features <- list(
#Add another plotting character
scale_shape_manual(values=c(16, 1, 2)),
#Added colour
geom_smooth(method = "lm", aes(group = factor(GENDER), colour = factor(GENDER)),
formula = formula, se = F),
stat_poly_eq(
aes(label = paste(paste(..eq.label.., ..rr.label.., sep = "~~~~"),
#Manually add in ESS
paste("ESS", c(9333,9622,19405), sep = "=="),
sep = "~~~~")),
formula = formula, parse = TRUE)
)
ggplot(df, aes(x = x, y = y, shape = factor(GENDER), group = factor(GENDER), colour = factor(GENDER))) +
geom_point(aes(shape = factor(GENDER))) +
my_features
编辑:如果你想删除第三组的绘图字符也可以这样做。
my_features <- list(
geom_smooth(method = "lm", aes(group = factor(GENDER), colour = factor(GENDER)),
formula = formula, se = F),
stat_poly_eq(
aes(label = paste(paste(..eq.label.., ..rr.label.., sep = "~~~~"),
#Manually add in ESS
paste("ESS", c(9333,9622,19405), sep = "=="),
sep = "~~~~")),
formula = formula, parse = TRUE)
)
p <- ggplot(df, aes(x = x, y = y, shape = factor(GENDER), group = factor(GENDER), colour = factor(GENDER))) +
my_features
p +
scale_color_manual(labels = c("Male", "Female", "Both"), values = hue_pal()(3)) +
geom_point(data = df[df$GENDER == 1,], aes(colour = factor(GENDER)), shape = 16)+
geom_point(data = df[df$GENDER == 2,], aes(colour = factor(GENDER)), shape = 1) +
guides(colour = guide_legend(title = "Gender", override.aes = list(shape = NA)))
在 R 中,我使用 stat_poly_eq()
在绘图上注释来自线性模型的方程,我遇到了 2 个问题:
如何注释三个单独的方程式,每组一个, 另一个与整个数据?
如何在每个上添加相应的误差平方和 (SSE) 等式?
如图here所示,以下代码生成一个包含所有数据的一般方程:
x <- runif(200, 0, 100)
y <- 5*x + rnorm(200, 0, 10)
df <- data.frame(x, y)
df$GENDER[1:100] <- 1
df$GENDER[101:nrow(df)] <- 2
formula <- y ~ poly(x, 1, raw = TRUE)
my_features <- list(scale_shape_manual(values=c(16, 1)),
geom_smooth(method = "lm", aes(group = 1),
formula = formula, colour = "Black",
fill = "grey70"),
geom_smooth(method = "lm", aes(group = factor(GENDER), se = F),
formula = formula, colour = "Black"),
stat_poly_eq(aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~~")),
formula = formula, parse = TRUE)
)
ggplot(df, aes(x = x, y = y, aes(shape = factor(GENDER)))) +
geom_point(aes(shape = factor(GENDER))) +
my_features
我不得不手动添加误差平方和,并根据完整数据集定位方程。使用下面的方法。
library(ggplot2)
library(ggpmisc)
# Get Error Sum of Squares
sum((lm(y ~ poly(x, 1, raw = TRUE)))$res^2)
sum(lm(y[df$GENDER == 1] ~ poly(x[df$GENDER == 1], 1, raw = TRUE))$res^2)
sum(lm(y[df$GENDER == 2] ~ poly(x[df$GENDER == 2], 1, raw = TRUE))$res^2)
my_features <- list(
scale_shape_manual(values=c(16, 1)),
geom_smooth(method = "lm", aes(group = 1),
formula = formula, colour = "Black", fill = "grey70"),
#Added colour
geom_smooth(method = "lm", aes(group = factor(GENDER), colour = factor(GENDER)),
formula = formula, se = F),
stat_poly_eq(
aes(label = paste(paste(..eq.label.., ..rr.label.., sep = "~~~~"),
#Manually add in ESS
paste("ESS", c(9333,9622), sep = "=="),
sep = "~~~~")),
formula = formula, parse = TRUE)
)
ggplot(df, aes(x = x, y = y, shape = factor(GENDER), colour = factor(GENDER))) +
geom_point(aes(shape = factor(GENDER))) +
my_features +
#Add in overall line and label
geom_smooth(method = "lm", aes(group = 1), colour = "black") +
stat_poly_eq(aes(group = 1, label = paste(..eq.label.., ..rr.label.., 'ESS==19405', sep = "~~~~")),
formula = formula, parse = TRUE, label.y = 440)
或者你可以复制你的数据集,所以完整的数据集包含在一个因子水平本身......仍然需要手动添加 ESS。
x <- runif(200, 0, 100)
y <- 5*x + rnorm(200, 0, 10)
df1 <- data.frame(x, y)
df1$GENDER[1:100] <- 1
df1$GENDER[101:nrow(df1)] <- 2
df2 <- df1
df2$GENDER <- 3
#Now data with GENDER == 3 is the full data
df <- rbind(df1, df2)
my_features <- list(
#Add another plotting character
scale_shape_manual(values=c(16, 1, 2)),
#Added colour
geom_smooth(method = "lm", aes(group = factor(GENDER), colour = factor(GENDER)),
formula = formula, se = F),
stat_poly_eq(
aes(label = paste(paste(..eq.label.., ..rr.label.., sep = "~~~~"),
#Manually add in ESS
paste("ESS", c(9333,9622,19405), sep = "=="),
sep = "~~~~")),
formula = formula, parse = TRUE)
)
ggplot(df, aes(x = x, y = y, shape = factor(GENDER), group = factor(GENDER), colour = factor(GENDER))) +
geom_point(aes(shape = factor(GENDER))) +
my_features
编辑:如果你想删除第三组的绘图字符也可以这样做。
my_features <- list(
geom_smooth(method = "lm", aes(group = factor(GENDER), colour = factor(GENDER)),
formula = formula, se = F),
stat_poly_eq(
aes(label = paste(paste(..eq.label.., ..rr.label.., sep = "~~~~"),
#Manually add in ESS
paste("ESS", c(9333,9622,19405), sep = "=="),
sep = "~~~~")),
formula = formula, parse = TRUE)
)
p <- ggplot(df, aes(x = x, y = y, shape = factor(GENDER), group = factor(GENDER), colour = factor(GENDER))) +
my_features
p +
scale_color_manual(labels = c("Male", "Female", "Both"), values = hue_pal()(3)) +
geom_point(data = df[df$GENDER == 1,], aes(colour = factor(GENDER)), shape = 16)+
geom_point(data = df[df$GENDER == 2,], aes(colour = factor(GENDER)), shape = 1) +
guides(colour = guide_legend(title = "Gender", override.aes = list(shape = NA)))