R中的多项式拟合和绘制回归线

polynomial fitting and plotting regression line in R

我正在尝试为我的数据找到 3 次正交多项式。这样做的目的是我想在我的数据上可视化不同的多项式拟合:3 次和 7 次。我使用的代码与 class 中的教授相同,但是我无法获得很好的结果。

orthpoly <- poly(Air_reduced$Temp, order=3)
Air_reduced$xo1 <- orthpoly[,1]
Air_reduced$xo2 <- orthpoly[,2]
Air_reduced$xo3 <- orthpoly[,3]

polymodel1 <- lm(Ozone ~ xo1 + xo2 + xo3, data=Air_reduced)
Air_reduced$fitted1 <- fitted(polymodel1)

?plot
plot(Air_reduced$Temp,Air_reduced$Ozone,xlab="x",ylab="f(x)",
     cex.lab=1.5,cex.axis=1.3,col="red",cex=1.3,
     main="Polynomial of degree 3", xlim = c(50,97), ylim = c(0,100))
lines(Air_reduced$Temp, Air_reduced$fitted1,col="blue",lwd=3)

然而,这会产生一个丑陋的图表。似乎有无数条回归线。

我做错了什么?

数据:

structure(list(Ozone = c(41L, 36L, 12L, 18L, 23L, 19L, 8L, 16L, 
11L, 14L, 18L, 14L, 34L, 6L, 30L, 11L, 1L, 11L, 4L, 32L, 23L, 
45L, 37L, 29L, 71L, 39L, 23L, 21L, 37L, 20L, 12L, 13L, 49L, 32L, 
64L, 40L, 77L, 97L, 97L, 85L, 10L, 27L, 7L, 48L, 35L, 61L, 79L, 
63L, 16L, 80L, 108L, 20L, 52L, 82L, 50L, 64L, 59L, 39L, 9L, 16L, 
122L, 89L, 110L, 44L, 28L, 65L, 22L, 59L, 23L, 31L, 44L, 21L, 
9L, 45L, 73L, 76L, 118L, 84L, 85L, 96L, 78L, 73L, 91L, 47L, 32L, 
20L, 23L, 21L, 24L, 44L, 21L, 28L, 9L, 13L, 46L, 18L, 13L, 24L, 
16L, 13L, 23L, 36L, 7L, 14L, 30L, 14L, 18L, 20L), Solar.R = c(190L, 
118L, 149L, 313L, 299L, 99L, 19L, 256L, 290L, 274L, 65L, 334L, 
307L, 78L, 322L, 44L, 8L, 320L, 25L, 92L, 13L, 252L, 279L, 127L, 
291L, 323L, 148L, 191L, 284L, 37L, 120L, 137L, 248L, 236L, 175L, 
314L, 276L, 267L, 272L, 175L, 264L, 175L, 48L, 260L, 274L, 285L, 
187L, 220L, 7L, 294L, 223L, 81L, 82L, 213L, 275L, 253L, 254L, 
83L, 24L, 77L, 255L, 229L, 207L, 192L, 273L, 157L, 71L, 51L, 
115L, 244L, 190L, 259L, 36L, 212L, 215L, 203L, 225L, 237L, 188L, 
167L, 197L, 183L, 189L, 95L, 92L, 252L, 220L, 230L, 259L, 236L, 
259L, 238L, 24L, 112L, 237L, 224L, 27L, 238L, 201L, 238L, 14L, 
139L, 49L, 20L, 193L, 191L, 131L, 223L), Wind = c(7.4, 8, 12.6, 
11.5, 8.6, 13.8, 20.1, 9.7, 9.2, 10.9, 13.2, 11.5, 12, 18.4, 
11.5, 9.7, 9.7, 16.6, 9.7, 12, 12, 14.9, 7.4, 9.7, 13.8, 11.5, 
8, 14.9, 20.7, 9.2, 11.5, 10.3, 9.2, 9.2, 4.6, 10.9, 5.1, 6.3, 
5.7, 7.4, 14.3, 14.9, 14.3, 6.9, 10.3, 6.3, 5.1, 11.5, 6.9, 8.6, 
8, 8.6, 12, 7.4, 7.4, 7.4, 9.2, 6.9, 13.8, 7.4, 4, 10.3, 8, 11.5, 
11.5, 9.7, 10.3, 6.3, 7.4, 10.9, 10.3, 15.5, 14.3, 9.7, 8, 9.7, 
2.3, 6.3, 6.3, 6.9, 5.1, 2.8, 4.6, 7.4, 15.5, 10.9, 10.3, 10.9, 
9.7, 14.9, 15.5, 6.3, 10.9, 11.5, 6.9, 13.8, 10.3, 10.3, 8, 12.6, 
9.2, 10.3, 10.3, 16.6, 6.9, 14.3, 8, 11.5), Temp = c(67L, 72L, 
74L, 62L, 65L, 59L, 61L, 69L, 66L, 68L, 58L, 64L, 66L, 57L, 68L, 
62L, 59L, 73L, 61L, 61L, 67L, 81L, 76L, 82L, 90L, 87L, 82L, 77L, 
72L, 65L, 73L, 76L, 85L, 81L, 83L, 83L, 88L, 92L, 92L, 89L, 73L, 
81L, 80L, 81L, 82L, 84L, 87L, 85L, 74L, 86L, 85L, 82L, 86L, 88L, 
86L, 83L, 81L, 81L, 81L, 82L, 89L, 90L, 90L, 86L, 82L, 80L, 77L, 
79L, 76L, 78L, 78L, 77L, 72L, 79L, 86L, 97L, 94L, 96L, 94L, 91L, 
92L, 93L, 93L, 87L, 84L, 80L, 78L, 75L, 73L, 81L, 76L, 77L, 71L, 
71L, 78L, 67L, 76L, 68L, 82L, 64L, 71L, 81L, 69L, 63L, 70L, 75L, 
76L, 68L), Month = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L), Day = c(1L, 2L, 3L, 4L, 7L, 8L, 9L, 12L, 13L, 14L, 15L, 
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 28L, 29L, 31L, 7L, 
9L, 10L, 13L, 16L, 17L, 18L, 19L, 20L, 2L, 3L, 5L, 6L, 7L, 8L, 
9L, 10L, 12L, 13L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 24L, 25L, 
26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 7L, 8L, 9L, 12L, 13L, 
14L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 26L, 28L, 29L, 30L, 
31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 
28L, 29L, 30L), ID = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 35L, 36L, 37L, 38L, 
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 
52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 
65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 78L, 
79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 
92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 
104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L)), .Names = c("Ozone", 
"Solar.R", "Wind", "Temp", "Month", "Day", "ID"), row.names = c(NA, 
-108L), class = c("tbl_df", "tbl", "data.frame"))

绘图前按 x 轴排列数据,绘图会很漂亮:

Air_reduced = Air_reduced[order(Air_reduced$Temp), ]

作为旁注,我鼓励您尝试 ggplot2 进行绘图。它可以适合简单的模型并一次绘制所有内容,并且它对默认设置很聪明(默认标签,绘制线条时默认对点进行排序......)。在这种情况下,如果您只想绘制包含两个多项式的图,只需几行代码:

library(ggplot2)
ggplot(Air_reduced, aes(x = Temp, y = Ozone)) +
  geom_point(color = "red") +
  stat_smooth(method = "lm",
              formula = y ~ poly(x, 3),
              aes(color = "3rd")) +
  stat_smooth(method = "lm",
              formula = y ~ poly(x, 7),
              aes(color = "7th")) +
  scale_color_manual(
    name = "Polynomial Degree",
    breaks = c("3rd", "7th"),
    values = c("blue", "green4")
  )