在 geom_line 上显示数据值
Display data values on geom_line
我使用 ggplot
和 geom_line
创建了一个图,并希望在图上显示各个数据值(数字)。
到目前为止,这是我的代码:
ggplot(errors_prob_3) +
geom_point(aes(x = ID, y = train_mean, colour = 'red')) +
geom_line(aes(x = ID, y = train_mean, colour = 'red')) +
geom_point(aes(x = ID, y = train_sd, colour = 'blue')) +
geom_line(aes(x = ID, y = train_sd, colour = 'blue')) +
geom_point(aes(x = ID, y = test_mean, colour = 'green')) +
geom_line(aes(x = ID, y = test_mean, colour = 'green')) +
geom_point(aes(x = ID, y = test_sd, colour = 'purple') )+
geom_line(aes(x = ID, y = test_sd, colour = 'purple')) +
labs(x = "Number of predictors", y = "Mean/sd value",
title = "Plot of the training and test cv error") +
scale_color_discrete(name = "Legend", labels = c("Train mean", "Train sd", "Test mean", "Test sd"))
我得到一个错误:
Aesthetics must be either length 1 or the same as the data (6): label.
errors_prob_3
是6个obs和5个变量的数据框。
我的数据如下:
structure(list(ID = 1:6, train_mean = c(9.62056569110574, 9.16711345988648, 7.6586555534977, 7.62016661214069, 7.56713995100505,7.30900730515059),
train_sd = c(0.986375934334889, 1.00056316593354, 0.710802251192404, 0.691415430116925, 0.687478712185106, 0.726304511632279),
test_mean = c(10.3738135132565, 10.0369765520266, 8.14168965211194, 8.40469077569391, 8.23953878513202, 8.0334726272832),
test_sd = c(3.78543823754458, 3.6662904556102, 2.89196938350374, 2.74691446446284, 2.76568576521599, 2.98708614111826)),
row.names = c(NA, -6L), class = "data.frame")
我现在的剧情
任何帮助,将不胜感激。谢谢!
主要的技巧是reshape your data from wide to long format。然后一切都变得更加简单甚至直观。
library(tidyverse)
library(ggplot2)
errors_prob_3 %>%
gather(key = "variable", value = "value", -ID) %>%
ggplot(aes(x = ID, y = value, colour = variable)) +
geom_line() +
geom_point() +
geom_text(aes(label = round(value, 1)),
vjust = "inward", hjust = "inward",
show.legend = FALSE) +
labs(x = "Number of predictors", y = "Mean/sd value",
title = "Plot of the training and test cv error") +
scale_color_discrete(name = "Legend", labels = c("Train mean", "Train sd", "Test mean", "Test sd"))
编辑
要仅显示第一个或最后一个文本标签,请对数据进行子集化。在这种情况下,我将显示每行的最后一个标签。
errors_prob_3 %>%
gather(key = "variable", value = "value", -ID) %>%
ggplot(aes(x = ID, y = value, colour = variable)) +
geom_line() +
geom_point() +
geom_text(
data = . %>% filter(ID == max(ID)),
aes(label = round(value, 1)),
vjust = "inward", hjust = "inward",
show.legend = FALSE) +
labs(x = "Number of predictors", y = "Mean/sd value",
title = "Plot of the training and test cv error") +
scale_color_discrete(name = "Legend", labels = c("Train mean", "Train sd", "Test mean", "Test sd"))
我使用 ggplot
和 geom_line
创建了一个图,并希望在图上显示各个数据值(数字)。
到目前为止,这是我的代码:
ggplot(errors_prob_3) +
geom_point(aes(x = ID, y = train_mean, colour = 'red')) +
geom_line(aes(x = ID, y = train_mean, colour = 'red')) +
geom_point(aes(x = ID, y = train_sd, colour = 'blue')) +
geom_line(aes(x = ID, y = train_sd, colour = 'blue')) +
geom_point(aes(x = ID, y = test_mean, colour = 'green')) +
geom_line(aes(x = ID, y = test_mean, colour = 'green')) +
geom_point(aes(x = ID, y = test_sd, colour = 'purple') )+
geom_line(aes(x = ID, y = test_sd, colour = 'purple')) +
labs(x = "Number of predictors", y = "Mean/sd value",
title = "Plot of the training and test cv error") +
scale_color_discrete(name = "Legend", labels = c("Train mean", "Train sd", "Test mean", "Test sd"))
我得到一个错误:
Aesthetics must be either length 1 or the same as the data (6): label.
errors_prob_3
是6个obs和5个变量的数据框。
我的数据如下:
structure(list(ID = 1:6, train_mean = c(9.62056569110574, 9.16711345988648, 7.6586555534977, 7.62016661214069, 7.56713995100505,7.30900730515059),
train_sd = c(0.986375934334889, 1.00056316593354, 0.710802251192404, 0.691415430116925, 0.687478712185106, 0.726304511632279),
test_mean = c(10.3738135132565, 10.0369765520266, 8.14168965211194, 8.40469077569391, 8.23953878513202, 8.0334726272832),
test_sd = c(3.78543823754458, 3.6662904556102, 2.89196938350374, 2.74691446446284, 2.76568576521599, 2.98708614111826)),
row.names = c(NA, -6L), class = "data.frame")
我现在的剧情
主要的技巧是reshape your data from wide to long format。然后一切都变得更加简单甚至直观。
library(tidyverse)
library(ggplot2)
errors_prob_3 %>%
gather(key = "variable", value = "value", -ID) %>%
ggplot(aes(x = ID, y = value, colour = variable)) +
geom_line() +
geom_point() +
geom_text(aes(label = round(value, 1)),
vjust = "inward", hjust = "inward",
show.legend = FALSE) +
labs(x = "Number of predictors", y = "Mean/sd value",
title = "Plot of the training and test cv error") +
scale_color_discrete(name = "Legend", labels = c("Train mean", "Train sd", "Test mean", "Test sd"))
编辑
要仅显示第一个或最后一个文本标签,请对数据进行子集化。在这种情况下,我将显示每行的最后一个标签。
errors_prob_3 %>%
gather(key = "variable", value = "value", -ID) %>%
ggplot(aes(x = ID, y = value, colour = variable)) +
geom_line() +
geom_point() +
geom_text(
data = . %>% filter(ID == max(ID)),
aes(label = round(value, 1)),
vjust = "inward", hjust = "inward",
show.legend = FALSE) +
labs(x = "Number of predictors", y = "Mean/sd value",
title = "Plot of the training and test cv error") +
scale_color_discrete(name = "Legend", labels = c("Train mean", "Train sd", "Test mean", "Test sd"))