在lists of lists中查找所有匹配的列表
Find all the matched list in lists of lists
我有一个向量列表
我想找到所有常见的向量。即那些包含完全相同元素的那些,保留每个列表在 R 中的位置编号。如果可能,一个线性命令。
这是我的清单:
mylist<-list(c("yes", "no"), c("no", "other", "up",
"down"), c("no", "yes"), c("no",
"yes"), c("no", "yes", "maybe"), c("no",
"yes", "maybe"), c("no", "yes", "maybe"))
期望的输出:
常见的列表是:匹配 1: 1,3,4
匹配 2: 5,6,7
duplicated
接受列表作为其主要参数。所以你可以使用
which(duplicated(mylist1) | duplicated(mylist1, fromLast=TRUE))
[1] 3 4 5 6 7
你的第一个例子。请注意,这不会区分具有共同元素的列表元素组,但只会 return 对具有相同元素的元素为 TRUE。
对于第二个示例数据集,您可以使用以下方法查找组的位置
# get group values as integers
groups <- as.integer(factor(sapply(mylist2,
function(x) paste(sort(x), collapse=""))))
# return list of groups
lapply(seq_len(max(groups)), function(x) which(x == groups))
[[1]]
[1] 2
[[2]]
[1] 5 6 7
[[3]]
[1] 1 3 4
数据
mylist1 <-
list(c("yes", "no"), c("no", "other", "up", "down"), c("no",
"yes", "maybe"), c("no", "yes", "maybe"), c("no", "yes", "maybe"
), c("no", "yes", "maybe"), c("no", "yes", "maybe"))
mylist2 <-
list(c("yes", "no"), c("no", "other", "up", "down"), c("no",
"yes"), c("no", "yes"), c("no", "yes", "maybe"), c("no", "yes",
"maybe"), c("no", "yes", "maybe"))
这对我有用:
mylist<-list(c("yes", "no"), c("no", "other", "up",
"down"), c("no", "yes"), c("no",
"yes"), c("no", "yes", "maybe"), c("no",
"yes", "maybe"), c("no", "yes", "maybe"))
library(dplyr)
# function to create a dataframe from your list. Might not be the most efficient way to do this.
f <- function(data) {
nCol <- max(vapply(data, length, 0))
data <- lapply(data, function(row) c(row, rep(NA, nCol-length(row))))
data <- matrix(unlist(data), nrow=length(data), ncol=nCol, byrow=TRUE)
data.frame(data)
}
# create a dataframe from the list, and add a 'key' column
df = f(mylist)
df$key = apply( df , 1 , paste , collapse = "-" )
# find the total times the key occurs
df_total = df %>% group_by(key) %>% summarise(n =n())
# find the indices that belong to the groups
result = lapply(df_total$key, function(x) which(df$key==x))
结果:
> result
[[1]]
[1] 2
[[2]]
[1] 5 6 7
[[3]]
[1] 3 4
[[4]]
[1] 1
希望对您有所帮助!
这是一个使用split
的选项
Filter(function(x) length(x) >1, split(seq_along(mylist),
sapply(mylist, function(x) toString(sort(x)))))
#$`maybe, no, yes`
#[1] 5 6 7
#$`no, yes`
#[1] 1 3 4
数据
mylist <- list(c("yes", "no"), c("no", "other", "up", "down"), c("no", "yes"),
c("no", "yes"), c("no", "yes", "maybe"), c("no", "yes", "maybe"),
c("no", "yes", "maybe"))
一(长)单线
sapply(unique(unlist(lapply(mylist, function(x) paste(sort(x), collapse = " ")))), function(y) which(y == unlist(lapply(mylist, function(x) paste(sort(x), collapse = " ")))))
输出:
$`no yes`
[1] 1 3 4
$`down no other up`
[1] 2
$`maybe no yes`
[1] 5 6 7
这是一个有趣的。您可以使用 qdapTools
包中的 mtabulate
来获取以下数据框,
d1 <- qdapTools::mtabulate(mylist)
d1
# down maybe no other up yes
#1 0 0 1 0 0 1
#2 1 0 1 1 1 0
#3 0 0 1 0 0 1
#4 0 0 1 0 0 1
#5 0 1 1 0 0 1
#6 0 1 1 0 0 1
#7 0 1 1 0 0 1
然后可以粘贴拆分,
l1 <- split(d1, do.call(paste, d1))
l1
#$`0 0 1 0 0 1`
# down maybe no other up yes
#1 0 0 1 0 0 1
#3 0 0 1 0 0 1
#4 0 0 1 0 0 1
#$`0 1 1 0 0 1`
# down maybe no other up yes
#5 0 1 1 0 0 1
#6 0 1 1 0 0 1
#7 0 1 1 0 0 1
#$`1 0 1 1 1 0`
# down maybe no other up yes
#2 1 0 1 1 1 0
您可以根据需要使用该列表,即
lapply(l1, rownames)
#$`0 0 1 0 0 1`
#[1] "1" "3" "4"
#$`0 1 1 0 0 1`
#[1] "5" "6" "7"
#$`1 0 1 1 1 0`
#[1] "2"
甚至,
setNames(lapply(l1, rownames), lapply(l1, function(i)toString(names(i)[i[1,] == 1])))
#$`no, yes`
#[1] "1" "3" "4"
#$`maybe, no, yes`
#[1] "5" "6" "7"
#$`down, no, other, up`
#[1] "2"
我有一个向量列表 我想找到所有常见的向量。即那些包含完全相同元素的那些,保留每个列表在 R 中的位置编号。如果可能,一个线性命令。
这是我的清单:
mylist<-list(c("yes", "no"), c("no", "other", "up",
"down"), c("no", "yes"), c("no",
"yes"), c("no", "yes", "maybe"), c("no",
"yes", "maybe"), c("no", "yes", "maybe"))
期望的输出:
常见的列表是:匹配 1: 1,3,4 匹配 2: 5,6,7
duplicated
接受列表作为其主要参数。所以你可以使用
which(duplicated(mylist1) | duplicated(mylist1, fromLast=TRUE))
[1] 3 4 5 6 7
你的第一个例子。请注意,这不会区分具有共同元素的列表元素组,但只会 return 对具有相同元素的元素为 TRUE。
对于第二个示例数据集,您可以使用以下方法查找组的位置
# get group values as integers
groups <- as.integer(factor(sapply(mylist2,
function(x) paste(sort(x), collapse=""))))
# return list of groups
lapply(seq_len(max(groups)), function(x) which(x == groups))
[[1]]
[1] 2
[[2]]
[1] 5 6 7
[[3]]
[1] 1 3 4
数据
mylist1 <-
list(c("yes", "no"), c("no", "other", "up", "down"), c("no",
"yes", "maybe"), c("no", "yes", "maybe"), c("no", "yes", "maybe"
), c("no", "yes", "maybe"), c("no", "yes", "maybe"))
mylist2 <-
list(c("yes", "no"), c("no", "other", "up", "down"), c("no",
"yes"), c("no", "yes"), c("no", "yes", "maybe"), c("no", "yes",
"maybe"), c("no", "yes", "maybe"))
这对我有用:
mylist<-list(c("yes", "no"), c("no", "other", "up",
"down"), c("no", "yes"), c("no",
"yes"), c("no", "yes", "maybe"), c("no",
"yes", "maybe"), c("no", "yes", "maybe"))
library(dplyr)
# function to create a dataframe from your list. Might not be the most efficient way to do this.
f <- function(data) {
nCol <- max(vapply(data, length, 0))
data <- lapply(data, function(row) c(row, rep(NA, nCol-length(row))))
data <- matrix(unlist(data), nrow=length(data), ncol=nCol, byrow=TRUE)
data.frame(data)
}
# create a dataframe from the list, and add a 'key' column
df = f(mylist)
df$key = apply( df , 1 , paste , collapse = "-" )
# find the total times the key occurs
df_total = df %>% group_by(key) %>% summarise(n =n())
# find the indices that belong to the groups
result = lapply(df_total$key, function(x) which(df$key==x))
结果:
> result
[[1]]
[1] 2
[[2]]
[1] 5 6 7
[[3]]
[1] 3 4
[[4]]
[1] 1
希望对您有所帮助!
这是一个使用split
Filter(function(x) length(x) >1, split(seq_along(mylist),
sapply(mylist, function(x) toString(sort(x)))))
#$`maybe, no, yes`
#[1] 5 6 7
#$`no, yes`
#[1] 1 3 4
数据
mylist <- list(c("yes", "no"), c("no", "other", "up", "down"), c("no", "yes"),
c("no", "yes"), c("no", "yes", "maybe"), c("no", "yes", "maybe"),
c("no", "yes", "maybe"))
一(长)单线
sapply(unique(unlist(lapply(mylist, function(x) paste(sort(x), collapse = " ")))), function(y) which(y == unlist(lapply(mylist, function(x) paste(sort(x), collapse = " ")))))
输出:
$`no yes`
[1] 1 3 4
$`down no other up`
[1] 2
$`maybe no yes`
[1] 5 6 7
这是一个有趣的。您可以使用 qdapTools
包中的 mtabulate
来获取以下数据框,
d1 <- qdapTools::mtabulate(mylist)
d1
# down maybe no other up yes
#1 0 0 1 0 0 1
#2 1 0 1 1 1 0
#3 0 0 1 0 0 1
#4 0 0 1 0 0 1
#5 0 1 1 0 0 1
#6 0 1 1 0 0 1
#7 0 1 1 0 0 1
然后可以粘贴拆分,
l1 <- split(d1, do.call(paste, d1))
l1
#$`0 0 1 0 0 1`
# down maybe no other up yes
#1 0 0 1 0 0 1
#3 0 0 1 0 0 1
#4 0 0 1 0 0 1
#$`0 1 1 0 0 1`
# down maybe no other up yes
#5 0 1 1 0 0 1
#6 0 1 1 0 0 1
#7 0 1 1 0 0 1
#$`1 0 1 1 1 0`
# down maybe no other up yes
#2 1 0 1 1 1 0
您可以根据需要使用该列表,即
lapply(l1, rownames)
#$`0 0 1 0 0 1`
#[1] "1" "3" "4"
#$`0 1 1 0 0 1`
#[1] "5" "6" "7"
#$`1 0 1 1 1 0`
#[1] "2"
甚至,
setNames(lapply(l1, rownames), lapply(l1, function(i)toString(names(i)[i[1,] == 1])))
#$`no, yes`
#[1] "1" "3" "4"
#$`maybe, no, yes`
#[1] "5" "6" "7"
#$`down, no, other, up`
#[1] "2"