识别 TRUE 和 FALSE 序列中最后一个 TRUE 的位置
Identifying positions of the last TRUEs in a sequence of TRUEs and FALSEs
我有一个 TRUE 和 FALSE 向量:
x <- c(F,F,F,T,T,T,F,F,F,T,T,T,F,T,T)
我想在最后一个 TRUE 变为 FALSE 之前优雅地(并在基础上)识别它的位置。
不过,以下工作似乎可以简化:
c((x[-1] != x[-length(x)]),T) & x
> FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
输入输出:
勾选rle
rlex = rle(x)
end = cumsum(rlex$lengths)
x&(seq(length(x)) %in% end)
[1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
Frank 建议的另一种布局
seq_along(x) %in% with(rle(x), cumsum(lengths)[values])
[1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
用于识别 FALSE
.
之前的最后一个 TRUE
的非 base
解决方案
library(dplyr)
y <- data.frame(x = c(FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,
FALSE,TRUE,TRUE,TRUE,FALSE,TRUE,TRUE))
y %>%
mutate(lasttrue = case_when(x == TRUE & lead(x) == FALSE ~ TRUE,
TRUE ~ FALSE))
编辑:
y %>%
mutate(lasttrue = case_when(x > lead(x) ~ T,
T ~ F))
另一个版本rle
x[setdiff(seq_along(x), with(rle(x), cumsum(lengths) * values))] <- FALSE
x
#[1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
利用 diff
和附加的 FALSE
在末尾捕获隐含的 TRUE
-to-FALSE
。
diff(c(x,FALSE)) == -1
# [1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE
#[13] FALSE FALSE TRUE
我们可能会查看 x
大于移位 x
并附加 0
的地方。
x>c(x[-1],0)
# [1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
选项duplicated
library(data.table)
!duplicated(rleid(x), fromLast = TRUE) & x
#[1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
基准测试
感谢您提供所有解决方案。如果有人对基准测试感兴趣:
library(dplyr)
library(data.table)
set.seed(1)
x <- sample(c(TRUE, FALSE), 1000000, replace = T)
y <- data.frame(x = x) # For M. Viking's solution
x_dt <- x # For Ronak Shah's solution
microbenchmark::microbenchmark(Khaynes = {Khaynes <- c((x[-1] != x[-length(x)]),T) & x},
jay.sf = {jay.sf <- x>c(x[-1],0)},
jay.sf_2 = {jay.sf_2 <- diff(c(x,0))<0},
thelatemail = {thelatemail <- diff(c(x,FALSE)) == -1},
WeNYoBen = {rlex = rle(x); end = cumsum(rlex$lengths); WeNYoBen <- x&(seq(length(x)) %in% end)},
M._Viking = {M._Viking <- y %>% mutate(lasttrue = case_when(x > lead(x) ~ T, T ~ F))},
akrun = {akrun <- !duplicated(rleid(x), fromLast = TRUE) & x},
frank = {frank <- seq_along(x) %in% with(rle(x), cumsum(lengths)[values])},
Ronak_Shah = {x_dt[setdiff(seq_along(x_dt), with(rle(x_dt), cumsum(lengths) * values))] <- FALSE},
times = 50)
# Output:
# Unit: milliseconds
# expr min lq mean median uq max neval
# Khaynes 23.0283 26.5010 31.76180 31.71290 37.1449 46.3824 50
# jay.sf 13.0630 13.5373 17.84056 13.77135 20.5462 73.5926 50
# jay.sf_2 26.1960 27.7653 35.25296 36.39615 39.3686 61.8858 50
# thelatemail 24.8204 26.7178 32.51675 33.50165 36.6328 41.9279 50
# WeNYoBen 83.9070 98.4700 107.79965 101.88475 107.1933 170.2940 50
# M._Viking 73.5963 83.4467 93.99603 86.58535 94.0915 151.7075 50
# akrun 42.5265 43.2879 48.42697 44.98085 51.1533 105.2836 50
# frank 81.9115 90.1559 95.40261 93.97015 98.2921 129.6162 50
# Ronak_Shah 109.0678 121.8230 133.10690 125.63930 133.7222 231.5350 50
all.equal(Khaynes, jay.sf)
all.equal(Khaynes, jay.sf_2)
all.equal(Khaynes, thelatemail)
all.equal(Khaynes, WeNYoBen)
all.equal(Khaynes, M._Viking$lasttrue) # When the last element is TRUE it will return false.
all.equal(Khaynes, akrun)
all.equal(Khaynes, frank)
all.equal(Khaynes, x_dt) # Ronak Shah solution.
我有一个 TRUE 和 FALSE 向量:
x <- c(F,F,F,T,T,T,F,F,F,T,T,T,F,T,T)
我想在最后一个 TRUE 变为 FALSE 之前优雅地(并在基础上)识别它的位置。
不过,以下工作似乎可以简化:
c((x[-1] != x[-length(x)]),T) & x
> FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
输入输出:
勾选rle
rlex = rle(x)
end = cumsum(rlex$lengths)
x&(seq(length(x)) %in% end)
[1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
Frank 建议的另一种布局
seq_along(x) %in% with(rle(x), cumsum(lengths)[values])
[1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
用于识别 FALSE
.
TRUE
的非 base
解决方案
library(dplyr)
y <- data.frame(x = c(FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,
FALSE,TRUE,TRUE,TRUE,FALSE,TRUE,TRUE))
y %>%
mutate(lasttrue = case_when(x == TRUE & lead(x) == FALSE ~ TRUE,
TRUE ~ FALSE))
编辑:
y %>%
mutate(lasttrue = case_when(x > lead(x) ~ T,
T ~ F))
另一个版本rle
x[setdiff(seq_along(x), with(rle(x), cumsum(lengths) * values))] <- FALSE
x
#[1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
利用 diff
和附加的 FALSE
在末尾捕获隐含的 TRUE
-to-FALSE
。
diff(c(x,FALSE)) == -1
# [1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE
#[13] FALSE FALSE TRUE
我们可能会查看 x
大于移位 x
并附加 0
的地方。
x>c(x[-1],0)
# [1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
选项duplicated
library(data.table)
!duplicated(rleid(x), fromLast = TRUE) & x
#[1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
基准测试
感谢您提供所有解决方案。如果有人对基准测试感兴趣:
library(dplyr)
library(data.table)
set.seed(1)
x <- sample(c(TRUE, FALSE), 1000000, replace = T)
y <- data.frame(x = x) # For M. Viking's solution
x_dt <- x # For Ronak Shah's solution
microbenchmark::microbenchmark(Khaynes = {Khaynes <- c((x[-1] != x[-length(x)]),T) & x},
jay.sf = {jay.sf <- x>c(x[-1],0)},
jay.sf_2 = {jay.sf_2 <- diff(c(x,0))<0},
thelatemail = {thelatemail <- diff(c(x,FALSE)) == -1},
WeNYoBen = {rlex = rle(x); end = cumsum(rlex$lengths); WeNYoBen <- x&(seq(length(x)) %in% end)},
M._Viking = {M._Viking <- y %>% mutate(lasttrue = case_when(x > lead(x) ~ T, T ~ F))},
akrun = {akrun <- !duplicated(rleid(x), fromLast = TRUE) & x},
frank = {frank <- seq_along(x) %in% with(rle(x), cumsum(lengths)[values])},
Ronak_Shah = {x_dt[setdiff(seq_along(x_dt), with(rle(x_dt), cumsum(lengths) * values))] <- FALSE},
times = 50)
# Output:
# Unit: milliseconds
# expr min lq mean median uq max neval
# Khaynes 23.0283 26.5010 31.76180 31.71290 37.1449 46.3824 50
# jay.sf 13.0630 13.5373 17.84056 13.77135 20.5462 73.5926 50
# jay.sf_2 26.1960 27.7653 35.25296 36.39615 39.3686 61.8858 50
# thelatemail 24.8204 26.7178 32.51675 33.50165 36.6328 41.9279 50
# WeNYoBen 83.9070 98.4700 107.79965 101.88475 107.1933 170.2940 50
# M._Viking 73.5963 83.4467 93.99603 86.58535 94.0915 151.7075 50
# akrun 42.5265 43.2879 48.42697 44.98085 51.1533 105.2836 50
# frank 81.9115 90.1559 95.40261 93.97015 98.2921 129.6162 50
# Ronak_Shah 109.0678 121.8230 133.10690 125.63930 133.7222 231.5350 50
all.equal(Khaynes, jay.sf)
all.equal(Khaynes, jay.sf_2)
all.equal(Khaynes, thelatemail)
all.equal(Khaynes, WeNYoBen)
all.equal(Khaynes, M._Viking$lasttrue) # When the last element is TRUE it will return false.
all.equal(Khaynes, akrun)
all.equal(Khaynes, frank)
all.equal(Khaynes, x_dt) # Ronak Shah solution.