在 r 中按行过滤数据帧最长的重复数字序列
Filter dataframe for longest sequence of repeated numbers by row in r
我正在尝试创建一个 "filter-by" 矩阵,我可以用它来隔离我的数据框中的数据行,这样每一行只包含与相同的最长连续序列相对应的值数,而其余的保持为零。四处搜索后,我认为 rle 是要使用的功能,但这并没有给我想要的东西。这是我的代码和结果的示例。非常感谢建议和解决方案。谢谢!
样本数据:
a<- c(1,0,1,1,1,1,0,0)
b<- c(0,0,0,1,1,1,0,1)
c<- c(0,0,1,1,0,0,0,1)
d<- c(1,0,0,1,1,1,1,0)
e<- c(1,0,0,1,0,0,1,1)
f<- c(0,0,0,1,1,1,0,1)
g<- c(0,0,1,1,0,0,0,1)
test.data <- data.frame(cbind(a,b,c,d,e,f,g))
# > test.data
# a b c d e f g
# 1 1 0 0 1 1 0 0
# 2 0 0 0 0 0 0 0
# 3 1 0 1 0 0 0 1
# 4 1 1 1 1 1 1 1
# 5 1 1 0 1 0 1 0
# 6 1 1 0 1 0 1 0
# 7 0 0 0 1 1 0 0
# 8 0 1 1 0 1 1 1
尝试解决方案的示例代码:
result <- data.frame(lapply(test.data, function(x) {
r <- rle(x)
r$values[r$lengths!=max(r$lengths)]==1
r2=inverse.rle(r)
r2
}))
我得到的结果(看起来像输入内容的精确副本?):
# > result
# a b c d e f g
# 1 1 0 0 1 1 0 0
# 2 0 0 0 0 0 0 0
# 3 1 0 1 0 0 0 1
# 4 1 1 1 1 1 1 1
# 5 1 1 0 1 0 1 0
# 6 1 1 0 1 0 1 0
# 7 0 0 0 1 1 0 0
# 8 0 1 1 0 1 1 1
这是我想要得到的结果(如果更简单,可以使用 T/F 代替 1 和 0):
# > result
# a b c d e f g
# 1 0 0 0 1 1 0 0
# 2 0 0 0 0 0 0 0
# 3 0 0 0 0 0 0 0
# 4 1 1 1 1 1 1 1
# 5 1 1 0 0 0 0 0
# 6 1 1 0 0 0 0 0
# 7 0 0 0 1 1 0 0
# 8 0 0 0 0 1 1 1
请多多指教!
我想这就是你想要的...
test.data[] <- t(apply(test.data,1,function(x) {y<-rle(x)
y$values[y$lengths==1] <- 0
y$values[y$lengths!=max(y$lengths[y$values==1])] <- 0
return(inverse.rle(y))}))
test.data
a b c d e f g
1 0 0 0 1 1 0 0
2 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0
4 1 1 1 1 1 1 1
5 1 1 0 0 0 0 0
6 1 1 0 0 0 0 0
7 0 0 0 1 1 0 0
8 0 0 0 0 1 1 1
library(magrittr)
val <- 1
test.data %>%
apply(1, function(x){
rle(x) %$% {
if(all(values != val)) rep(0, length(x))
else {
m <- max(lengths[values == val])
# Get only longest sequences
values <- (lengths == m & values == val)*values*(m > 1)
# Get only one of them
values[seq_along(values) != which(values == val)[1]] <- 0
rep(values, lengths)
}
}}) %>% t
# [,1] [,2] [,3] [,4] [,5] [,6] [,7]
# [1,] 0 0 0 1 1 0 0
# [2,] 0 0 0 0 0 0 0
# [3,] 0 0 0 0 0 0 0
# [4,] 1 1 1 1 1 1 1
# [5,] 1 1 0 0 0 0 0
# [6,] 1 1 0 0 0 0 0
# [7,] 0 0 0 1 1 0 0
# [8,] 0 0 0 0 1 1 1
我正在尝试创建一个 "filter-by" 矩阵,我可以用它来隔离我的数据框中的数据行,这样每一行只包含与相同的最长连续序列相对应的值数,而其余的保持为零。四处搜索后,我认为 rle 是要使用的功能,但这并没有给我想要的东西。这是我的代码和结果的示例。非常感谢建议和解决方案。谢谢!
样本数据:
a<- c(1,0,1,1,1,1,0,0)
b<- c(0,0,0,1,1,1,0,1)
c<- c(0,0,1,1,0,0,0,1)
d<- c(1,0,0,1,1,1,1,0)
e<- c(1,0,0,1,0,0,1,1)
f<- c(0,0,0,1,1,1,0,1)
g<- c(0,0,1,1,0,0,0,1)
test.data <- data.frame(cbind(a,b,c,d,e,f,g))
# > test.data
# a b c d e f g
# 1 1 0 0 1 1 0 0
# 2 0 0 0 0 0 0 0
# 3 1 0 1 0 0 0 1
# 4 1 1 1 1 1 1 1
# 5 1 1 0 1 0 1 0
# 6 1 1 0 1 0 1 0
# 7 0 0 0 1 1 0 0
# 8 0 1 1 0 1 1 1
尝试解决方案的示例代码:
result <- data.frame(lapply(test.data, function(x) {
r <- rle(x)
r$values[r$lengths!=max(r$lengths)]==1
r2=inverse.rle(r)
r2
}))
我得到的结果(看起来像输入内容的精确副本?):
# > result
# a b c d e f g
# 1 1 0 0 1 1 0 0
# 2 0 0 0 0 0 0 0
# 3 1 0 1 0 0 0 1
# 4 1 1 1 1 1 1 1
# 5 1 1 0 1 0 1 0
# 6 1 1 0 1 0 1 0
# 7 0 0 0 1 1 0 0
# 8 0 1 1 0 1 1 1
这是我想要得到的结果(如果更简单,可以使用 T/F 代替 1 和 0):
# > result
# a b c d e f g
# 1 0 0 0 1 1 0 0
# 2 0 0 0 0 0 0 0
# 3 0 0 0 0 0 0 0
# 4 1 1 1 1 1 1 1
# 5 1 1 0 0 0 0 0
# 6 1 1 0 0 0 0 0
# 7 0 0 0 1 1 0 0
# 8 0 0 0 0 1 1 1
请多多指教!
我想这就是你想要的...
test.data[] <- t(apply(test.data,1,function(x) {y<-rle(x)
y$values[y$lengths==1] <- 0
y$values[y$lengths!=max(y$lengths[y$values==1])] <- 0
return(inverse.rle(y))}))
test.data
a b c d e f g
1 0 0 0 1 1 0 0
2 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0
4 1 1 1 1 1 1 1
5 1 1 0 0 0 0 0
6 1 1 0 0 0 0 0
7 0 0 0 1 1 0 0
8 0 0 0 0 1 1 1
library(magrittr)
val <- 1
test.data %>%
apply(1, function(x){
rle(x) %$% {
if(all(values != val)) rep(0, length(x))
else {
m <- max(lengths[values == val])
# Get only longest sequences
values <- (lengths == m & values == val)*values*(m > 1)
# Get only one of them
values[seq_along(values) != which(values == val)[1]] <- 0
rep(values, lengths)
}
}}) %>% t
# [,1] [,2] [,3] [,4] [,5] [,6] [,7]
# [1,] 0 0 0 1 1 0 0
# [2,] 0 0 0 0 0 0 0
# [3,] 0 0 0 0 0 0 0
# [4,] 1 1 1 1 1 1 1
# [5,] 1 1 0 0 0 0 0
# [6,] 1 1 0 0 0 0 0
# [7,] 0 0 0 1 1 0 0
# [8,] 0 0 0 0 1 1 1