Geom_area ggplot 中的顺序
Geom_area order in ggplot
我有样本队列数据,但未 color-coded 或在使用 ggplot()
绘制时未按正确的时间顺序显示。以下代码用于生成绘图:
library(ggplot2)
blues <- colorRampPalette(c('lightblue', 'darkblue'))
p <- ggplot(cohort.chart, aes(x=month, y=users, group=cohort))
p + geom_area(aes(fill = cohort)) +
scale_fill_manual(values = blues(12)) +
ggtitle('Users by cohort') +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
正如你在图例中看到的那样,月份不是按时间顺序排列的,这在剧情中有所体现。我尝试将 cohort
和 month
都更改为 yearmon
带有 as.yearmon()
的对象,但这会产生无法使用连续变量的错误。如何解决 ggplot()
中的这个问题?
这是数据
> dput(cohort.chart)
structure(list(cohort = structure(c(11L, 10L, 3L, 5L, 4L, 8L,
1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L,
6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L,
11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L,
3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L,
8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L,
7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L,
12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L,
10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L,
4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L,
9L, 7L, 6L, 2L, 12L), .Label = c("Apr 2017", "Aug 2017", "Dec 2016",
"Feb 2017", "Jan 2017", "Jul 2017", "Jun 2017", "Mar 2017", "May 2017",
"Nov 2016", "Oct 2016", "Sep 2017"), class = "factor"), month = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L), .Label = c("Oct 2016", "Nov 2016",
"Dec 2016", "Jan 2017", "Feb 2017", "Mar 2017", "Apr 2017", "May 2017",
"Jun 2017", "Jul 2017", "Aug 2017", "Sep 2017"), class = "factor"),
users = c(795, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 672, 92,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 526, 48, 26, 0, 0, 0, 0, 0,
0, 0, 0, 0, 506, 37, 14, 26, 0, 0, 0, 0, 0, 0, 0, 0, 514,
46, 18, 19, 37, 0, 0, 0, 0, 0, 0, 0, 532, 47, 16, 18, 22,
57, 0, 0, 0, 0, 0, 0, 589, 55, 15, 20, 24, 39, 88, 0, 0,
0, 0, 0, 548, 53, 18, 21, 25, 39, 62, 172, 0, 0, 0, 0, 559,
53, 20, 20, 23, 36, 61, 139, 133, 0, 0, 0, 538, 58, 17, 18,
22, 39, 54, 130, 99, 109, 0, 0, 519, 45, 16, 19, 26, 39,
50, 125, 96, 86, 115, 0, 530, 54, 18, 20, 23, 37, 51, 129,
88, 75, 85, 126)), row.names = c(NA, -144L), .Names = c("cohort",
"month", "users"), class = "data.frame")
情节应该类似于以下内容:
根据您提供的数据框,您的 month
列处于正确的因子水平,但您的 cohort
列不正确,因此请更改 cohort
的因子水平列与您的 month
列相同。
library(ggplot2)
blues <- colorRampPalette(c('lightblue', 'darkblue'))
# Change the factor level
cohort.chart$cohort <- factor(cohort.chart$cohort, levels = levels(cohort.chart$month))
p <- ggplot(cohort.chart, aes(x=month, y=users, group=cohort))
p + geom_area(aes(fill = cohort)) +
scale_fill_manual(values = blues(12)) +
ggtitle('Users by cohort') +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
当您创建数据框时,R(自动)将字符串转换为因子,按字母顺序为每个新的 cohort
日期创建一个新因子。
cohort.chart$cohort
[1]... 12 Levels: Apr 2017 Aug 2017 Dec 2016 Feb 2017 Jan 2017 Jul 2017 Jun 2017 Mar 2017 May 2017 ... Sep 2017
你可以使用 as.yearmon
来解决这个问题,你只需要在之后转换回一个因子。
cohort.chart$cohort <- as.yearmon(cohort.chart$cohort)
cohort.chart$cohort <- as.factor(cohort.chart$cohort)
我有样本队列数据,但未 color-coded 或在使用 ggplot()
绘制时未按正确的时间顺序显示。以下代码用于生成绘图:
library(ggplot2)
blues <- colorRampPalette(c('lightblue', 'darkblue'))
p <- ggplot(cohort.chart, aes(x=month, y=users, group=cohort))
p + geom_area(aes(fill = cohort)) +
scale_fill_manual(values = blues(12)) +
ggtitle('Users by cohort') +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
正如你在图例中看到的那样,月份不是按时间顺序排列的,这在剧情中有所体现。我尝试将 cohort
和 month
都更改为 yearmon
带有 as.yearmon()
的对象,但这会产生无法使用连续变量的错误。如何解决 ggplot()
中的这个问题?
这是数据
> dput(cohort.chart)
structure(list(cohort = structure(c(11L, 10L, 3L, 5L, 4L, 8L,
1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L,
6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L,
11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L,
3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L,
8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L,
7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L,
12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L,
10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L,
4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L,
9L, 7L, 6L, 2L, 12L), .Label = c("Apr 2017", "Aug 2017", "Dec 2016",
"Feb 2017", "Jan 2017", "Jul 2017", "Jun 2017", "Mar 2017", "May 2017",
"Nov 2016", "Oct 2016", "Sep 2017"), class = "factor"), month = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L), .Label = c("Oct 2016", "Nov 2016",
"Dec 2016", "Jan 2017", "Feb 2017", "Mar 2017", "Apr 2017", "May 2017",
"Jun 2017", "Jul 2017", "Aug 2017", "Sep 2017"), class = "factor"),
users = c(795, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 672, 92,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 526, 48, 26, 0, 0, 0, 0, 0,
0, 0, 0, 0, 506, 37, 14, 26, 0, 0, 0, 0, 0, 0, 0, 0, 514,
46, 18, 19, 37, 0, 0, 0, 0, 0, 0, 0, 532, 47, 16, 18, 22,
57, 0, 0, 0, 0, 0, 0, 589, 55, 15, 20, 24, 39, 88, 0, 0,
0, 0, 0, 548, 53, 18, 21, 25, 39, 62, 172, 0, 0, 0, 0, 559,
53, 20, 20, 23, 36, 61, 139, 133, 0, 0, 0, 538, 58, 17, 18,
22, 39, 54, 130, 99, 109, 0, 0, 519, 45, 16, 19, 26, 39,
50, 125, 96, 86, 115, 0, 530, 54, 18, 20, 23, 37, 51, 129,
88, 75, 85, 126)), row.names = c(NA, -144L), .Names = c("cohort",
"month", "users"), class = "data.frame")
情节应该类似于以下内容:
根据您提供的数据框,您的 month
列处于正确的因子水平,但您的 cohort
列不正确,因此请更改 cohort
的因子水平列与您的 month
列相同。
library(ggplot2)
blues <- colorRampPalette(c('lightblue', 'darkblue'))
# Change the factor level
cohort.chart$cohort <- factor(cohort.chart$cohort, levels = levels(cohort.chart$month))
p <- ggplot(cohort.chart, aes(x=month, y=users, group=cohort))
p + geom_area(aes(fill = cohort)) +
scale_fill_manual(values = blues(12)) +
ggtitle('Users by cohort') +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
当您创建数据框时,R(自动)将字符串转换为因子,按字母顺序为每个新的 cohort
日期创建一个新因子。
cohort.chart$cohort
[1]... 12 Levels: Apr 2017 Aug 2017 Dec 2016 Feb 2017 Jan 2017 Jul 2017 Jun 2017 Mar 2017 May 2017 ... Sep 2017
你可以使用 as.yearmon
来解决这个问题,你只需要在之后转换回一个因子。
cohort.chart$cohort <- as.yearmon(cohort.chart$cohort)
cohort.chart$cohort <- as.factor(cohort.chart$cohort)