通过给定点的回归线

regression line passing through a given point

我是编码新手。我想在 R 中有一个必须通过给定点 (X, Y) 的回归图。例如,我想绘制一条穿过 (41, 2900) 的以下示例数据的线性回归线。参考this link,我尝试如下。但我的结果不同。红色是简单的线性模型,蓝色是理想的约束模型,与预期不符。有人可以帮助我吗?

library(ggplot2)
ggplot(data=sampledata, mapping=aes(x, y)) +
  geom_point(data=sampledata, size=2) + 
  xlab("X") +
  ylab("Y") +
  geom_smooth(method="lm", size=2, colour="red", se=FALSE) +
  geom_hline(yintercept=2900) + 
  geom_vline(xintercept=41) + 
  geom_smooth(size=2, colour="blue", method="lm", 
              formula=I(y-2900) ~ I(x-41), se=FALSE)

数据

sampledata <- structure(list(x = c(253.84, 210.28, 192.81, 187.1, 181.43, 176.31, 
                               172.62, 169.08, 164.53, 162.01, 160.19, 158.08, 156.43, 154.98, 
                               153.16, 151.35, 149.29, 147.58, 144.98, 143.57, 142.45, 140.93, 
                               140.08, 138.88, 137.79, 136.77, 136.05, 135.07, 134.35, 133.25, 
                               132.5, 131.51, 130.68, 130.11, 128.84, 127.85, 126.28, 125.38, 
                               124.49, 123.48, 122.32, 121.56, 120.47, 119.66, 118.68, 117.67, 
                               116.51, 115.53, 114.62, 113.45, 112.35, 111.02, 109.02, 107.66, 
                               106.95, 104.84, 103.8, 102.44, 101.33, 100.36, 98.96, 97.18, 
                               95.79, 93.81, 91, 87.4, 84.43, 82.56, 79.53, 76.02, 70.97, 67.24, 
                               63.71, 60.69, 58.37, 56.21, 53.74, 52.1, 50.29, 48.37, 46.87, 
                               45.57, 43.92, 43.15, 42.13, 41.64), y = c(1056, 1140, 1200, 1308, 
                                                                         1188, 1368, 1380, 1536, 1608, 1440, 1428, 1716, 1632, 1692, 1680, 
                                                                         1512, 1440, 1632, 1716, 1656, 1584, 1872, 1812, 1884, 2004, 1692, 
                                                                         2100, 1740, 1980, 1944, 2100, 1896, 1956, 1692, 1812, 1920, 2076, 
                                                                         2148, 2076, 1908, 2028, 2268, 1944, 2052, 2196, 2184, 2436, 2124, 
                                                                         2076, 2040, 1980, 2280, 2232, 2340, 2328, 2292, 2232, 2184, 2580, 
                                                                         2424, 2304, 2364, 2460, 2592, 2568, 2436, 2616, 2796, 2640, 2808, 
                                                                         2988, 2808, 1884, 2844, 2856, 2868, 3132, 3108, 2880, 2856, 2916, 
                                                                         2892, 2772, 2844, 2784, 2664)), class = c("tbl_df", "tbl", "data.frame"
                                                                         ), row.names = c(NA, -86L))

问题是相信 y 坐标将移动直线必须经过的点的纵坐标。事实上,它必须移动该点与对应于 x = 41 的预测 y 之间的差异。

fit <- lm(y ~ x, sampledata)
x0 <- 41
y0 <- 2900
(yshift <- predict(fit, data.frame(x = x0)) - y0)
#      1 
#84.0286

所以现在在情节代码中使用这些值,x0y0yshift

library(ggplot2)

ggplot(data = sampledata, mapping = aes(x, y)) +
  geom_point(size = 1) +
  geom_vline(xintercept = x0, linetype = "dashed") +
  geom_hline(yintercept = y0, linetype = "dashed") +
  geom_point(x = x0, y = y0, size = 3, shape = 3, color = "blue") +
  geom_smooth(method = "lm",
              formula = y ~ x,
              size = 1, colour = "red", se = FALSE) +
  geom_smooth(method = "lm",
              formula = I(y - yshift) ~ x,
              size = 1, colour = "blue", se = FALSE) +
  xlab("X") +
  ylab("Y")