如何突出回归中的某些点
How to highlight certain points in a regression
我正在 运行 进行回归,并想突出显示具有极端残差值的点。
我 运行 回归并将残差列添加到我的数据框中,然后我设置要在 dat$Outlier 列中显示的数据标签,并设置我希望点的颜色在 dat$Color 列中。
基本上,当您 运行 这段代码时,我想将 dat$Outlier 的文本显示为数据标签,并根据 dat$color
对点进行着色
time = as.POSIXct(c("2015-05-01 10:00:00","2015-05-01 10:05:00","2015-05-01 10:10:00","2015-05-01 10:15:00","2015-05-01 10:20:00"),"GMT")
s =rnorm(5)
m= rnorm(5)
dat =data.frame(t =time, s = s, m =m)
reg = lm(s~m)
dat$resid = resid(reg)
dat$Outlier = ifelse(dat$resid > sd(dat$resid)*1, as.character(dat$t), "")
dat$Color = ifelse(dat$resid > sd(dat$resid)*1, "red", "black")
dat
plot(s, m)
abline(reg)
例如,如果我的 dat 数据框如下所示:
t s m resid Outlier Color
1 2015-05-01 10:00:00 -0.7141181 -0.54383561 -0.3645389 black
2 2015-05-01 10:05:00 -1.7444731 0.09249989 -0.4226707 black
3 2015-05-01 10:10:00 -1.1257465 0.12563139 0.2466758 black
4 2015-05-01 10:15:00 0.6201680 -0.47515076 1.0746872 2015-05-01 10:15:00 red
5 2015-05-01 10:20:00 -0.7979108 -0.60000735 -0.5341534 black
如何使第 4 行显示 2015-05-01 10:15:00 作为数据标签并将该点显示为红色?
您可以使用 points
和 text
:
set.seed(1)
time = as.POSIXct(c("2015-05-01 10:00:00","2015-05-01 10:05:00","2015-05-01 10:10:00","2015-05-01 10:15:00","2015-05-01 10:20:00"),"GMT")
s =rnorm(5)
m= rnorm(5)
dat =data.frame(t =time, s = s, m =m)
reg = lm(s~m)
dat$resid = resid(reg)
dat$Outlier = ifelse(dat$resid > sd(dat$resid)*1, as.character(dat$t), "")
dat$Color = ifelse(dat$resid > sd(dat$resid)*1, "red", "black")
dat
plot(s, m, lwd=3)
abline(reg)
outlierRows <- which(dat$Outlier != "")
points(s[outlierRows],m[outlierRows],col="red",lwd=8)
text(s[outlierRows],m[outlierRows],
labels=dat$Outlier[outlierRows],
pos=1)
.
> dat
t s m resid Outlier
1 2015-05-01 10:00:00 -0.6264538 -0.8204684 -0.37276065
2 2015-05-01 10:05:00 0.1836433 0.4874291 -0.08680989
3 2015-05-01 10:10:00 -0.8356286 0.7383247 -1.20662950
4 2015-05-01 10:15:00 1.5952808 0.5757814 1.28941997 2015-05-01 10:15:00
5 2015-05-01 10:20:00 0.3295078 -0.3053884 0.37678008
Color
1 black
2 black
3 black
4 red
5 black
>
library(ggplot2)
ggplot(data = dat) +
geom_point(aes(x = m, y = s, colour = Color)) +
geom_abline(slope = reg$coefficients[[2]],
intercept = reg$coefficients[[1]]) +
geom_text(aes(label=Outlier, x = m, y = s)) +
scale_color_identity(guide = "none")
我正在 运行 进行回归,并想突出显示具有极端残差值的点。
我 运行 回归并将残差列添加到我的数据框中,然后我设置要在 dat$Outlier 列中显示的数据标签,并设置我希望点的颜色在 dat$Color 列中。
基本上,当您 运行 这段代码时,我想将 dat$Outlier 的文本显示为数据标签,并根据 dat$color
对点进行着色time = as.POSIXct(c("2015-05-01 10:00:00","2015-05-01 10:05:00","2015-05-01 10:10:00","2015-05-01 10:15:00","2015-05-01 10:20:00"),"GMT")
s =rnorm(5)
m= rnorm(5)
dat =data.frame(t =time, s = s, m =m)
reg = lm(s~m)
dat$resid = resid(reg)
dat$Outlier = ifelse(dat$resid > sd(dat$resid)*1, as.character(dat$t), "")
dat$Color = ifelse(dat$resid > sd(dat$resid)*1, "red", "black")
dat
plot(s, m)
abline(reg)
例如,如果我的 dat 数据框如下所示:
t s m resid Outlier Color
1 2015-05-01 10:00:00 -0.7141181 -0.54383561 -0.3645389 black
2 2015-05-01 10:05:00 -1.7444731 0.09249989 -0.4226707 black
3 2015-05-01 10:10:00 -1.1257465 0.12563139 0.2466758 black
4 2015-05-01 10:15:00 0.6201680 -0.47515076 1.0746872 2015-05-01 10:15:00 red
5 2015-05-01 10:20:00 -0.7979108 -0.60000735 -0.5341534 black
如何使第 4 行显示 2015-05-01 10:15:00 作为数据标签并将该点显示为红色?
您可以使用 points
和 text
:
set.seed(1)
time = as.POSIXct(c("2015-05-01 10:00:00","2015-05-01 10:05:00","2015-05-01 10:10:00","2015-05-01 10:15:00","2015-05-01 10:20:00"),"GMT")
s =rnorm(5)
m= rnorm(5)
dat =data.frame(t =time, s = s, m =m)
reg = lm(s~m)
dat$resid = resid(reg)
dat$Outlier = ifelse(dat$resid > sd(dat$resid)*1, as.character(dat$t), "")
dat$Color = ifelse(dat$resid > sd(dat$resid)*1, "red", "black")
dat
plot(s, m, lwd=3)
abline(reg)
outlierRows <- which(dat$Outlier != "")
points(s[outlierRows],m[outlierRows],col="red",lwd=8)
text(s[outlierRows],m[outlierRows],
labels=dat$Outlier[outlierRows],
pos=1)
.
> dat
t s m resid Outlier
1 2015-05-01 10:00:00 -0.6264538 -0.8204684 -0.37276065
2 2015-05-01 10:05:00 0.1836433 0.4874291 -0.08680989
3 2015-05-01 10:10:00 -0.8356286 0.7383247 -1.20662950
4 2015-05-01 10:15:00 1.5952808 0.5757814 1.28941997 2015-05-01 10:15:00
5 2015-05-01 10:20:00 0.3295078 -0.3053884 0.37678008
Color
1 black
2 black
3 black
4 red
5 black
>
library(ggplot2)
ggplot(data = dat) +
geom_point(aes(x = m, y = s, colour = Color)) +
geom_abline(slope = reg$coefficients[[2]],
intercept = reg$coefficients[[1]]) +
geom_text(aes(label=Outlier, x = m, y = s)) +
scale_color_identity(guide = "none")