在 ggplot2 中填充热图(24 小时乘 7 天)
fill a heat map (24h by 7days) in ggplot2
我有这样的自行车数据 - 数据框的尺寸很大。
> dim(All_2014)
[1] 994367 10
> head(All_2014)
X bikeid end.station.id start.station.id diff.time stoptime starttime
1 1 16379 285 356 338387 2014-01-02 15:22:28 2014-01-06 13:22:15
2 2 16379 361 146 47631 2014-01-09 22:45:34 2014-01-10 11:59:25
3 3 16379 268 327 5089 2014-01-10 12:35:22 2014-01-10 14:00:11
4 4 16379 398 324 715924 2014-01-22 14:34:55 2014-01-30 21:26:59
5 5 15611 536 445 716031 2014-01-02 15:30:44 2014-01-10 22:24:35
6 6 15611 348 433 68544 2014-01-12 14:03:01 2014-01-13 09:05:25
midtime Hour Day
1 2014-01-04 14:22:21 14 Saturday
2 2014-01-10 05:22:29 5 Friday
3 2014-01-10 13:17:46 13 Friday
4 2014-01-26 18:00:57 18 Sunday
5 2014-01-06 18:57:39 18 Monday
6 2014-01-12 23:34:13 23 Sunday
我的目标是使用 ggplot2
(或其他包,如果它更适合)创建一个看起来像这个的热图,其中星期几在 y 轴上,小时在x 轴(小时不必在 AM/PM 中,它可以保持 24 小时制。:
方框的填充百分比表示在一周中的那一天给定的 hour-interval/the 总乘车次数中的乘车次数。我已经设法用数据做到了这一点,但想知道找到百分比的最简单方法,然后是如何用它们创建热图。
用dplyr做计算,用ggplot2做图表:
library(dplyr)
library(ggplot2)
## First siimulate some data
rider_num <- 1:10000
days <- factor(c("Sun", "Mon", "Tues", "Wed", "Thur", "Fri", "Sat"),
levels = rev(c("Sun", "Mon", "Tues", "Wed", "Thur", "Fri", "Sat")),
ordered = TRUE)
day <- sample(days, 10000, TRUE,
c(0.3, 0.5, 0.8, 0.8, 0.6, 0.5, 0.2))
hour <- round(rbeta(10000, 1, 2, 6) * 23)
df <- data.frame(rider_num, hour, day)
## Use dplyr functions to summarize on days and hours to get the
## percentage of riders per hour each day:
df2 <- df %>%
group_by(day, hour) %>%
summarise(n=n()) %>%
mutate(percent_of_riders=n/sum(n)*100)
## Plot using ggplot and geom_tile, tweaking colours and theme elements
## to your liking:
ggplot(df2, aes(hour, day)) +
geom_tile(aes(fill = percent_of_riders), colour = "white") +
scale_fill_distiller(palette = "YlGnBu", direction = 1) +
scale_x_discrete(breaks = 0:23, labels = 0:23) +
theme_minimal() +
theme(legend.position = "bottom", legend.key.width = unit(2, "cm"),
panel.grid = element_blank()) +
coord_equal()
使用@andyteucher 的 df2
,这是一个 lattice
方法:
library(lattice)
library(RColorBrewer)
levelplot(percent_of_riders~hour+day, df2,
aspect='iso', xlab='', ylab='', border='white',
col.regions=colorRampPalette(brewer.pal(9, 'YlGnBu')),
at=seq(0, 12, length=100), # specify breaks for the colour ramp
scales=list(alternating=FALSE, tck=1:0, x=list(at=0:23)))
用零替换缺失数据(例如周日午夜)的一种简单方法是将 xtabs
对象传递给 levelplot
:
levelplot(xtabs(percent_of_riders ~ hour+day, df2), aspect='iso', xlab='', ylab='',
col.regions=colorRampPalette(brewer.pal(9, 'YlGnBu')),
at=seq(0, 12, length=100),
scales=list(alternating=FALSE, tck=1:0),
border='white')
您还可以使用 d3heatmap
进行互动:
library(d3heatmap)
xt <- xtabs(percent_of_riders~day+hour, df2)
d3heatmap(xt[7:1, ], colors='YlGnBu', dendrogram = "none")
我有这样的自行车数据 - 数据框的尺寸很大。
> dim(All_2014) [1] 994367 10 > head(All_2014) X bikeid end.station.id start.station.id diff.time stoptime starttime 1 1 16379 285 356 338387 2014-01-02 15:22:28 2014-01-06 13:22:15 2 2 16379 361 146 47631 2014-01-09 22:45:34 2014-01-10 11:59:25 3 3 16379 268 327 5089 2014-01-10 12:35:22 2014-01-10 14:00:11 4 4 16379 398 324 715924 2014-01-22 14:34:55 2014-01-30 21:26:59 5 5 15611 536 445 716031 2014-01-02 15:30:44 2014-01-10 22:24:35 6 6 15611 348 433 68544 2014-01-12 14:03:01 2014-01-13 09:05:25 midtime Hour Day 1 2014-01-04 14:22:21 14 Saturday 2 2014-01-10 05:22:29 5 Friday 3 2014-01-10 13:17:46 13 Friday 4 2014-01-26 18:00:57 18 Sunday 5 2014-01-06 18:57:39 18 Monday 6 2014-01-12 23:34:13 23 Sunday
我的目标是使用 ggplot2
(或其他包,如果它更适合)创建一个看起来像这个的热图,其中星期几在 y 轴上,小时在x 轴(小时不必在 AM/PM 中,它可以保持 24 小时制。:
方框的填充百分比表示在一周中的那一天给定的 hour-interval/the 总乘车次数中的乘车次数。我已经设法用数据做到了这一点,但想知道找到百分比的最简单方法,然后是如何用它们创建热图。
用dplyr做计算,用ggplot2做图表:
library(dplyr)
library(ggplot2)
## First siimulate some data
rider_num <- 1:10000
days <- factor(c("Sun", "Mon", "Tues", "Wed", "Thur", "Fri", "Sat"),
levels = rev(c("Sun", "Mon", "Tues", "Wed", "Thur", "Fri", "Sat")),
ordered = TRUE)
day <- sample(days, 10000, TRUE,
c(0.3, 0.5, 0.8, 0.8, 0.6, 0.5, 0.2))
hour <- round(rbeta(10000, 1, 2, 6) * 23)
df <- data.frame(rider_num, hour, day)
## Use dplyr functions to summarize on days and hours to get the
## percentage of riders per hour each day:
df2 <- df %>%
group_by(day, hour) %>%
summarise(n=n()) %>%
mutate(percent_of_riders=n/sum(n)*100)
## Plot using ggplot and geom_tile, tweaking colours and theme elements
## to your liking:
ggplot(df2, aes(hour, day)) +
geom_tile(aes(fill = percent_of_riders), colour = "white") +
scale_fill_distiller(palette = "YlGnBu", direction = 1) +
scale_x_discrete(breaks = 0:23, labels = 0:23) +
theme_minimal() +
theme(legend.position = "bottom", legend.key.width = unit(2, "cm"),
panel.grid = element_blank()) +
coord_equal()
使用@andyteucher 的 df2
,这是一个 lattice
方法:
library(lattice)
library(RColorBrewer)
levelplot(percent_of_riders~hour+day, df2,
aspect='iso', xlab='', ylab='', border='white',
col.regions=colorRampPalette(brewer.pal(9, 'YlGnBu')),
at=seq(0, 12, length=100), # specify breaks for the colour ramp
scales=list(alternating=FALSE, tck=1:0, x=list(at=0:23)))
用零替换缺失数据(例如周日午夜)的一种简单方法是将 xtabs
对象传递给 levelplot
:
levelplot(xtabs(percent_of_riders ~ hour+day, df2), aspect='iso', xlab='', ylab='',
col.regions=colorRampPalette(brewer.pal(9, 'YlGnBu')),
at=seq(0, 12, length=100),
scales=list(alternating=FALSE, tck=1:0),
border='white')
您还可以使用 d3heatmap
进行互动:
library(d3heatmap)
xt <- xtabs(percent_of_riders~day+hour, df2)
d3heatmap(xt[7:1, ], colors='YlGnBu', dendrogram = "none")