如何在R中为ggplot中的矩形自动找到序列的开始和结束
How to find the start and the end of sequences automatically in R for rectangles in ggplot
我正在尝试用一些阴影矩形绘制一些数据。
数据框 df
如下所示:
df <- data.frame(time = seq(0.1, 2, 0.1),
speed = c(seq(0.5, 5, 0.5), seq(5, 0.5, -0.5)),
type = c("a", "a", "b", "b", "b", "b", "c", "c", "c", "b", "b", "b", "b", "b", "c", "a", "b", "c", "b", "b"))
对于图中的矩形,我定义了一个名为 dfRect
的对象,其中包含变量 xmin
和 xmax
.
dfRect <- data.frame(xmin = c(0.3, 1.0, 1.9), xmax = c(0.7, 1.5, 2.0))
问题是我必须为矩形的开始和结束手动找到 xmin
和 xmax
。矩形在 type
列中的 b
时间序列的开始处开始 (xmin
),并在 b
相同时间序列的末尾结束。单个b
可以忽略。
这是情节,让您了解我要完成的事情:
ggplot() +
geom_rect(data = dfRect,
aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf),
fill = "yellow", alpha = 0.4) +
geom_line(data = df, aes(x = time, y = speed, color = type, group = 1), size = 3)
所以最后的问题是。我如何自动化定义 xmin
和 xmax
的过程并自动创建 dfRect 以便我不必自己定义它?
试试这个。我在代码中添加了一些解释性注释:
library(dplyr)
library(tidyr)
library(ggplot2)
df <- data.frame(time = seq(0.1, 2, 0.1),
speed = c(seq(0.5, 5, 0.5), seq(5, 0.5, -0.5)),
type = c("a", "a", "b", "b", "b", "b", "c", "c", "c", "b", "b", "b", "b", "b", "c", "a", "b", "c", "b", "b"))
dfRect <- df %>%
arrange(time, type) %>%
# Get start and end of sequences
mutate(is_b_start = type == "b" & lag(type) != "b",
is_b_end = type != "b" & lag(type) == "b") %>%
filter(is_b_start | is_b_end) %>%
# Get id of sequences
mutate(id = cumsum(is_b_start),
type = ifelse(is_b_start, "min", "max")) %>%
select(time, id, type) %>%
# To wide format gives xmin and xmax for each sequence
tidyr::pivot_wider(names_from = type, names_prefix = "x", values_from = time) %>%
# In case: Fill last with max time
tidyr::replace_na(list(xmax = max(df$time)))
ggplot() +
geom_rect(data = dfRect,
aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf),
fill = "yellow", alpha = 0.4) +
geom_line(data = df, aes(x = time, y = speed, color = type, group = 1), size = 3)
由 reprex package (v0.3.0)
于 2020-06-21 创建
这是一种使用 运行 长度编码的方法。
library(ggplot2)
df <- data.frame(time = seq(0.1, 2, 0.1),
speed = c(seq(0.5, 5, 0.5), seq(5, 0.5, -0.5)),
type = c("a", "a", "b", "b", "b", "b", "c", "c", "c", "b", "b", "b", "b", "b", "c", "a", "b", "c", "b", "b"))
# Convert to runlength encoding
rle <- rle(df$type == "b")
# Ignoring the single "b"s
rle$values[rle$lengths == 1 & rle$values] <- FALSE
# Determine starts and ends
starts <- {ends <- cumsum(rle$lengths)} - rle$lengths + 1
# Build a data.frame from the rle
dfrect <- data.frame(
xmin = df$time[starts],
# We have to +1 the ends, because the linepieces end at the next datapoint
# Though we should not index out-of-bounds, so we need to cap at the last end
xmax = df$time[pmin(ends + 1, max(ends))],
fill = rle$values
)
此图让我们了解了上面代码中所做的事情:
ggplot() +
geom_rect(data = dfrect,
aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf, fill = fill),
alpha = 0.4) +
geom_line(data = df, aes(x = time, y = speed, color = type, group = 1), size = 3)
要获得您想要的内容,您需要过滤掉 FALSE
s。
ggplot() +
geom_rect(data = dfrect[dfrect$fill,],
aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf),
alpha = 0.4, fill = "yellow") +
geom_line(data = df, aes(x = time, y = speed, color = type, group = 1), size = 3)
如果您正在寻找可以为您计算的统计数据,请查看 here。免责声明:我写了那个函数,它做的事情与我上面发布的代码类似。
rle
的另一种方法,但使用 data.table::rleid。
旋转创意来自 Stefan!
我相信管道可以以某种方式进一步缩短
library(tidyverse)
df <- data.frame(
time = seq(0.1, 2, 0.1),
speed = c(seq(0.5, 5, 0.5), seq(5, 0.5, -0.5)),
type = c("a", "a", "b", "b", "b", "b", "c", "c", "c", "b", "b", "b", "b", "b", "c", "a", "b", "c", "b", "b")
)
dfRect <-
df %>%
arrange(time, type) %>%
mutate(id = data.table::rleid(type)) %>%
group_by(type, id) %>%
slice(c(1, n())) %>%
distinct(time, id) %>%
filter(type == "b" & n() > 1) %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = row, names_prefix = "x", values_from = time)
ggplot() +
geom_rect(
data = dfRect,
aes(xmin = x1, xmax = x2, ymin = -Inf, ymax = Inf),
fill = "yellow", alpha = 0.4
) +
geom_line(data = df, aes(x = time, y = speed, color = type, group = 1), size = 3)
我正在尝试用一些阴影矩形绘制一些数据。
数据框 df
如下所示:
df <- data.frame(time = seq(0.1, 2, 0.1),
speed = c(seq(0.5, 5, 0.5), seq(5, 0.5, -0.5)),
type = c("a", "a", "b", "b", "b", "b", "c", "c", "c", "b", "b", "b", "b", "b", "c", "a", "b", "c", "b", "b"))
对于图中的矩形,我定义了一个名为 dfRect
的对象,其中包含变量 xmin
和 xmax
.
dfRect <- data.frame(xmin = c(0.3, 1.0, 1.9), xmax = c(0.7, 1.5, 2.0))
问题是我必须为矩形的开始和结束手动找到 xmin
和 xmax
。矩形在 type
列中的 b
时间序列的开始处开始 (xmin
),并在 b
相同时间序列的末尾结束。单个b
可以忽略。
这是情节,让您了解我要完成的事情:
ggplot() +
geom_rect(data = dfRect,
aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf),
fill = "yellow", alpha = 0.4) +
geom_line(data = df, aes(x = time, y = speed, color = type, group = 1), size = 3)
所以最后的问题是。我如何自动化定义 xmin
和 xmax
的过程并自动创建 dfRect 以便我不必自己定义它?
试试这个。我在代码中添加了一些解释性注释:
library(dplyr)
library(tidyr)
library(ggplot2)
df <- data.frame(time = seq(0.1, 2, 0.1),
speed = c(seq(0.5, 5, 0.5), seq(5, 0.5, -0.5)),
type = c("a", "a", "b", "b", "b", "b", "c", "c", "c", "b", "b", "b", "b", "b", "c", "a", "b", "c", "b", "b"))
dfRect <- df %>%
arrange(time, type) %>%
# Get start and end of sequences
mutate(is_b_start = type == "b" & lag(type) != "b",
is_b_end = type != "b" & lag(type) == "b") %>%
filter(is_b_start | is_b_end) %>%
# Get id of sequences
mutate(id = cumsum(is_b_start),
type = ifelse(is_b_start, "min", "max")) %>%
select(time, id, type) %>%
# To wide format gives xmin and xmax for each sequence
tidyr::pivot_wider(names_from = type, names_prefix = "x", values_from = time) %>%
# In case: Fill last with max time
tidyr::replace_na(list(xmax = max(df$time)))
ggplot() +
geom_rect(data = dfRect,
aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf),
fill = "yellow", alpha = 0.4) +
geom_line(data = df, aes(x = time, y = speed, color = type, group = 1), size = 3)
由 reprex package (v0.3.0)
于 2020-06-21 创建这是一种使用 运行 长度编码的方法。
library(ggplot2)
df <- data.frame(time = seq(0.1, 2, 0.1),
speed = c(seq(0.5, 5, 0.5), seq(5, 0.5, -0.5)),
type = c("a", "a", "b", "b", "b", "b", "c", "c", "c", "b", "b", "b", "b", "b", "c", "a", "b", "c", "b", "b"))
# Convert to runlength encoding
rle <- rle(df$type == "b")
# Ignoring the single "b"s
rle$values[rle$lengths == 1 & rle$values] <- FALSE
# Determine starts and ends
starts <- {ends <- cumsum(rle$lengths)} - rle$lengths + 1
# Build a data.frame from the rle
dfrect <- data.frame(
xmin = df$time[starts],
# We have to +1 the ends, because the linepieces end at the next datapoint
# Though we should not index out-of-bounds, so we need to cap at the last end
xmax = df$time[pmin(ends + 1, max(ends))],
fill = rle$values
)
此图让我们了解了上面代码中所做的事情:
ggplot() +
geom_rect(data = dfrect,
aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf, fill = fill),
alpha = 0.4) +
geom_line(data = df, aes(x = time, y = speed, color = type, group = 1), size = 3)
要获得您想要的内容,您需要过滤掉 FALSE
s。
ggplot() +
geom_rect(data = dfrect[dfrect$fill,],
aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf),
alpha = 0.4, fill = "yellow") +
geom_line(data = df, aes(x = time, y = speed, color = type, group = 1), size = 3)
如果您正在寻找可以为您计算的统计数据,请查看 here。免责声明:我写了那个函数,它做的事情与我上面发布的代码类似。
rle
的另一种方法,但使用 data.table::rleid。
旋转创意来自 Stefan!
我相信管道可以以某种方式进一步缩短
library(tidyverse)
df <- data.frame(
time = seq(0.1, 2, 0.1),
speed = c(seq(0.5, 5, 0.5), seq(5, 0.5, -0.5)),
type = c("a", "a", "b", "b", "b", "b", "c", "c", "c", "b", "b", "b", "b", "b", "c", "a", "b", "c", "b", "b")
)
dfRect <-
df %>%
arrange(time, type) %>%
mutate(id = data.table::rleid(type)) %>%
group_by(type, id) %>%
slice(c(1, n())) %>%
distinct(time, id) %>%
filter(type == "b" & n() > 1) %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = row, names_prefix = "x", values_from = time)
ggplot() +
geom_rect(
data = dfRect,
aes(xmin = x1, xmax = x2, ymin = -Inf, ymax = Inf),
fill = "yellow", alpha = 0.4
) +
geom_line(data = df, aes(x = time, y = speed, color = type, group = 1), size = 3)