R中的多项式拟合和绘制回归线
polynomial fitting and plotting regression line in R
我正在尝试为我的数据找到 3 次正交多项式。这样做的目的是我想在我的数据上可视化不同的多项式拟合:3 次和 7 次。我使用的代码与 class 中的教授相同,但是我无法获得很好的结果。
orthpoly <- poly(Air_reduced$Temp, order=3)
Air_reduced$xo1 <- orthpoly[,1]
Air_reduced$xo2 <- orthpoly[,2]
Air_reduced$xo3 <- orthpoly[,3]
polymodel1 <- lm(Ozone ~ xo1 + xo2 + xo3, data=Air_reduced)
Air_reduced$fitted1 <- fitted(polymodel1)
?plot
plot(Air_reduced$Temp,Air_reduced$Ozone,xlab="x",ylab="f(x)",
cex.lab=1.5,cex.axis=1.3,col="red",cex=1.3,
main="Polynomial of degree 3", xlim = c(50,97), ylim = c(0,100))
lines(Air_reduced$Temp, Air_reduced$fitted1,col="blue",lwd=3)
然而,这会产生一个丑陋的图表。似乎有无数条回归线。
我做错了什么?
数据:
structure(list(Ozone = c(41L, 36L, 12L, 18L, 23L, 19L, 8L, 16L,
11L, 14L, 18L, 14L, 34L, 6L, 30L, 11L, 1L, 11L, 4L, 32L, 23L,
45L, 37L, 29L, 71L, 39L, 23L, 21L, 37L, 20L, 12L, 13L, 49L, 32L,
64L, 40L, 77L, 97L, 97L, 85L, 10L, 27L, 7L, 48L, 35L, 61L, 79L,
63L, 16L, 80L, 108L, 20L, 52L, 82L, 50L, 64L, 59L, 39L, 9L, 16L,
122L, 89L, 110L, 44L, 28L, 65L, 22L, 59L, 23L, 31L, 44L, 21L,
9L, 45L, 73L, 76L, 118L, 84L, 85L, 96L, 78L, 73L, 91L, 47L, 32L,
20L, 23L, 21L, 24L, 44L, 21L, 28L, 9L, 13L, 46L, 18L, 13L, 24L,
16L, 13L, 23L, 36L, 7L, 14L, 30L, 14L, 18L, 20L), Solar.R = c(190L,
118L, 149L, 313L, 299L, 99L, 19L, 256L, 290L, 274L, 65L, 334L,
307L, 78L, 322L, 44L, 8L, 320L, 25L, 92L, 13L, 252L, 279L, 127L,
291L, 323L, 148L, 191L, 284L, 37L, 120L, 137L, 248L, 236L, 175L,
314L, 276L, 267L, 272L, 175L, 264L, 175L, 48L, 260L, 274L, 285L,
187L, 220L, 7L, 294L, 223L, 81L, 82L, 213L, 275L, 253L, 254L,
83L, 24L, 77L, 255L, 229L, 207L, 192L, 273L, 157L, 71L, 51L,
115L, 244L, 190L, 259L, 36L, 212L, 215L, 203L, 225L, 237L, 188L,
167L, 197L, 183L, 189L, 95L, 92L, 252L, 220L, 230L, 259L, 236L,
259L, 238L, 24L, 112L, 237L, 224L, 27L, 238L, 201L, 238L, 14L,
139L, 49L, 20L, 193L, 191L, 131L, 223L), Wind = c(7.4, 8, 12.6,
11.5, 8.6, 13.8, 20.1, 9.7, 9.2, 10.9, 13.2, 11.5, 12, 18.4,
11.5, 9.7, 9.7, 16.6, 9.7, 12, 12, 14.9, 7.4, 9.7, 13.8, 11.5,
8, 14.9, 20.7, 9.2, 11.5, 10.3, 9.2, 9.2, 4.6, 10.9, 5.1, 6.3,
5.7, 7.4, 14.3, 14.9, 14.3, 6.9, 10.3, 6.3, 5.1, 11.5, 6.9, 8.6,
8, 8.6, 12, 7.4, 7.4, 7.4, 9.2, 6.9, 13.8, 7.4, 4, 10.3, 8, 11.5,
11.5, 9.7, 10.3, 6.3, 7.4, 10.9, 10.3, 15.5, 14.3, 9.7, 8, 9.7,
2.3, 6.3, 6.3, 6.9, 5.1, 2.8, 4.6, 7.4, 15.5, 10.9, 10.3, 10.9,
9.7, 14.9, 15.5, 6.3, 10.9, 11.5, 6.9, 13.8, 10.3, 10.3, 8, 12.6,
9.2, 10.3, 10.3, 16.6, 6.9, 14.3, 8, 11.5), Temp = c(67L, 72L,
74L, 62L, 65L, 59L, 61L, 69L, 66L, 68L, 58L, 64L, 66L, 57L, 68L,
62L, 59L, 73L, 61L, 61L, 67L, 81L, 76L, 82L, 90L, 87L, 82L, 77L,
72L, 65L, 73L, 76L, 85L, 81L, 83L, 83L, 88L, 92L, 92L, 89L, 73L,
81L, 80L, 81L, 82L, 84L, 87L, 85L, 74L, 86L, 85L, 82L, 86L, 88L,
86L, 83L, 81L, 81L, 81L, 82L, 89L, 90L, 90L, 86L, 82L, 80L, 77L,
79L, 76L, 78L, 78L, 77L, 72L, 79L, 86L, 97L, 94L, 96L, 94L, 91L,
92L, 93L, 93L, 87L, 84L, 80L, 78L, 75L, 73L, 81L, 76L, 77L, 71L,
71L, 78L, 67L, 76L, 68L, 82L, 64L, 71L, 81L, 69L, 63L, 70L, 75L,
76L, 68L), Month = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L), Day = c(1L, 2L, 3L, 4L, 7L, 8L, 9L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 28L, 29L, 31L, 7L,
9L, 10L, 13L, 16L, 17L, 18L, 19L, 20L, 2L, 3L, 5L, 6L, 7L, 8L,
9L, 10L, 12L, 13L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 7L, 8L, 9L, 12L, 13L,
14L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 26L, 28L, 29L, 30L,
31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
28L, 29L, 30L), ID = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L,
52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 78L,
79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L,
92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L,
104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L)), .Names = c("Ozone",
"Solar.R", "Wind", "Temp", "Month", "Day", "ID"), row.names = c(NA,
-108L), class = c("tbl_df", "tbl", "data.frame"))
绘图前按 x 轴排列数据,绘图会很漂亮:
Air_reduced = Air_reduced[order(Air_reduced$Temp), ]
作为旁注,我鼓励您尝试 ggplot2
进行绘图。它可以适合简单的模型并一次绘制所有内容,并且它对默认设置很聪明(默认标签,绘制线条时默认对点进行排序......)。在这种情况下,如果您只想绘制包含两个多项式的图,只需几行代码:
library(ggplot2)
ggplot(Air_reduced, aes(x = Temp, y = Ozone)) +
geom_point(color = "red") +
stat_smooth(method = "lm",
formula = y ~ poly(x, 3),
aes(color = "3rd")) +
stat_smooth(method = "lm",
formula = y ~ poly(x, 7),
aes(color = "7th")) +
scale_color_manual(
name = "Polynomial Degree",
breaks = c("3rd", "7th"),
values = c("blue", "green4")
)
我正在尝试为我的数据找到 3 次正交多项式。这样做的目的是我想在我的数据上可视化不同的多项式拟合:3 次和 7 次。我使用的代码与 class 中的教授相同,但是我无法获得很好的结果。
orthpoly <- poly(Air_reduced$Temp, order=3)
Air_reduced$xo1 <- orthpoly[,1]
Air_reduced$xo2 <- orthpoly[,2]
Air_reduced$xo3 <- orthpoly[,3]
polymodel1 <- lm(Ozone ~ xo1 + xo2 + xo3, data=Air_reduced)
Air_reduced$fitted1 <- fitted(polymodel1)
?plot
plot(Air_reduced$Temp,Air_reduced$Ozone,xlab="x",ylab="f(x)",
cex.lab=1.5,cex.axis=1.3,col="red",cex=1.3,
main="Polynomial of degree 3", xlim = c(50,97), ylim = c(0,100))
lines(Air_reduced$Temp, Air_reduced$fitted1,col="blue",lwd=3)
然而,这会产生一个丑陋的图表。似乎有无数条回归线。
我做错了什么?
数据:
structure(list(Ozone = c(41L, 36L, 12L, 18L, 23L, 19L, 8L, 16L,
11L, 14L, 18L, 14L, 34L, 6L, 30L, 11L, 1L, 11L, 4L, 32L, 23L,
45L, 37L, 29L, 71L, 39L, 23L, 21L, 37L, 20L, 12L, 13L, 49L, 32L,
64L, 40L, 77L, 97L, 97L, 85L, 10L, 27L, 7L, 48L, 35L, 61L, 79L,
63L, 16L, 80L, 108L, 20L, 52L, 82L, 50L, 64L, 59L, 39L, 9L, 16L,
122L, 89L, 110L, 44L, 28L, 65L, 22L, 59L, 23L, 31L, 44L, 21L,
9L, 45L, 73L, 76L, 118L, 84L, 85L, 96L, 78L, 73L, 91L, 47L, 32L,
20L, 23L, 21L, 24L, 44L, 21L, 28L, 9L, 13L, 46L, 18L, 13L, 24L,
16L, 13L, 23L, 36L, 7L, 14L, 30L, 14L, 18L, 20L), Solar.R = c(190L,
118L, 149L, 313L, 299L, 99L, 19L, 256L, 290L, 274L, 65L, 334L,
307L, 78L, 322L, 44L, 8L, 320L, 25L, 92L, 13L, 252L, 279L, 127L,
291L, 323L, 148L, 191L, 284L, 37L, 120L, 137L, 248L, 236L, 175L,
314L, 276L, 267L, 272L, 175L, 264L, 175L, 48L, 260L, 274L, 285L,
187L, 220L, 7L, 294L, 223L, 81L, 82L, 213L, 275L, 253L, 254L,
83L, 24L, 77L, 255L, 229L, 207L, 192L, 273L, 157L, 71L, 51L,
115L, 244L, 190L, 259L, 36L, 212L, 215L, 203L, 225L, 237L, 188L,
167L, 197L, 183L, 189L, 95L, 92L, 252L, 220L, 230L, 259L, 236L,
259L, 238L, 24L, 112L, 237L, 224L, 27L, 238L, 201L, 238L, 14L,
139L, 49L, 20L, 193L, 191L, 131L, 223L), Wind = c(7.4, 8, 12.6,
11.5, 8.6, 13.8, 20.1, 9.7, 9.2, 10.9, 13.2, 11.5, 12, 18.4,
11.5, 9.7, 9.7, 16.6, 9.7, 12, 12, 14.9, 7.4, 9.7, 13.8, 11.5,
8, 14.9, 20.7, 9.2, 11.5, 10.3, 9.2, 9.2, 4.6, 10.9, 5.1, 6.3,
5.7, 7.4, 14.3, 14.9, 14.3, 6.9, 10.3, 6.3, 5.1, 11.5, 6.9, 8.6,
8, 8.6, 12, 7.4, 7.4, 7.4, 9.2, 6.9, 13.8, 7.4, 4, 10.3, 8, 11.5,
11.5, 9.7, 10.3, 6.3, 7.4, 10.9, 10.3, 15.5, 14.3, 9.7, 8, 9.7,
2.3, 6.3, 6.3, 6.9, 5.1, 2.8, 4.6, 7.4, 15.5, 10.9, 10.3, 10.9,
9.7, 14.9, 15.5, 6.3, 10.9, 11.5, 6.9, 13.8, 10.3, 10.3, 8, 12.6,
9.2, 10.3, 10.3, 16.6, 6.9, 14.3, 8, 11.5), Temp = c(67L, 72L,
74L, 62L, 65L, 59L, 61L, 69L, 66L, 68L, 58L, 64L, 66L, 57L, 68L,
62L, 59L, 73L, 61L, 61L, 67L, 81L, 76L, 82L, 90L, 87L, 82L, 77L,
72L, 65L, 73L, 76L, 85L, 81L, 83L, 83L, 88L, 92L, 92L, 89L, 73L,
81L, 80L, 81L, 82L, 84L, 87L, 85L, 74L, 86L, 85L, 82L, 86L, 88L,
86L, 83L, 81L, 81L, 81L, 82L, 89L, 90L, 90L, 86L, 82L, 80L, 77L,
79L, 76L, 78L, 78L, 77L, 72L, 79L, 86L, 97L, 94L, 96L, 94L, 91L,
92L, 93L, 93L, 87L, 84L, 80L, 78L, 75L, 73L, 81L, 76L, 77L, 71L,
71L, 78L, 67L, 76L, 68L, 82L, 64L, 71L, 81L, 69L, 63L, 70L, 75L,
76L, 68L), Month = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L), Day = c(1L, 2L, 3L, 4L, 7L, 8L, 9L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 28L, 29L, 31L, 7L,
9L, 10L, 13L, 16L, 17L, 18L, 19L, 20L, 2L, 3L, 5L, 6L, 7L, 8L,
9L, 10L, 12L, 13L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 7L, 8L, 9L, 12L, 13L,
14L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 26L, 28L, 29L, 30L,
31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
28L, 29L, 30L), ID = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L,
52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 78L,
79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L,
92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L,
104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L)), .Names = c("Ozone",
"Solar.R", "Wind", "Temp", "Month", "Day", "ID"), row.names = c(NA,
-108L), class = c("tbl_df", "tbl", "data.frame"))
绘图前按 x 轴排列数据,绘图会很漂亮:
Air_reduced = Air_reduced[order(Air_reduced$Temp), ]
作为旁注,我鼓励您尝试 ggplot2
进行绘图。它可以适合简单的模型并一次绘制所有内容,并且它对默认设置很聪明(默认标签,绘制线条时默认对点进行排序......)。在这种情况下,如果您只想绘制包含两个多项式的图,只需几行代码:
library(ggplot2)
ggplot(Air_reduced, aes(x = Temp, y = Ozone)) +
geom_point(color = "red") +
stat_smooth(method = "lm",
formula = y ~ poly(x, 3),
aes(color = "3rd")) +
stat_smooth(method = "lm",
formula = y ~ poly(x, 7),
aes(color = "7th")) +
scale_color_manual(
name = "Polynomial Degree",
breaks = c("3rd", "7th"),
values = c("blue", "green4")
)