grepl 和向量列表的子集?
grepl and subset from a list of vectors?
我有一个从下面的代码生成的列表,
df <- USArrests
df <- na.omit(df)
df <- scale(df)
d <- dist(df, method = "euclidean")
# Hierarchical clustering using Complete Linkage
hc1 <- hclust(d, method = "complete" )
library(dendextend)
dend15 <- d %>% hclust(method = "average") %>% as.dendrogram
dend15 %>% plot
subtrees <- partition_leaves(dend15)
我想做的是用 grep
为关键字 "Maine" 子集化一个新列表。这可能吗?
数据样本:
head ( subtrees, 20 )
[[1]]
[1] "North Dakota" "Maine" "Iowa" "New Hampshire" "Vermont"
[6] "South Dakota" "West Virginia" "Delaware" "Rhode Island" "Massachusetts"
[11] "New Jersey" "Arkansas" "Kentucky" "Connecticut" "Minnesota"
[16] "Wisconsin" "Idaho" "Montana" "Nebraska" "Wyoming"
[21] "Virginia" "Oklahoma" "Indiana" "Kansas" "Ohio"
[26] "Pennsylvania" "Hawaii" "Utah" "Oregon" "Washington"
[31] "Alaska" "Georgia" "Tennessee" "Alabama" "Louisiana"
[36] "North Carolina" "Mississippi" "South Carolina" "California" "Nevada"
[41] "Florida" "Colorado" "Missouri" "Texas" "Illinois"
[46] "New York" "Arizona" "Michigan" "Maryland" "New Mexico"
[[2]]
[1] "North Dakota" "Maine" "Iowa" "New Hampshire" "Vermont"
[6] "South Dakota" "West Virginia" "Delaware" "Rhode Island" "Massachusetts"
[11] "New Jersey" "Arkansas" "Kentucky" "Connecticut" "Minnesota"
[16] "Wisconsin" "Idaho" "Montana" "Nebraska" "Wyoming"
[21] "Virginia" "Oklahoma" "Indiana" "Kansas" "Ohio"
[26] "Pennsylvania" "Hawaii" "Utah" "Oregon" "Washington"
[[3]]
[1] "North Dakota" "Maine" "Iowa" "New Hampshire" "Vermont"
[6] "South Dakota" "West Virginia"
[[4]]
[1] "North Dakota" "Maine" "Iowa" "New Hampshire"
[[5]]
[1] "North Dakota"
[[6]]
[1] "Maine" "Iowa" "New Hampshire"
[[7]]
[1] "Maine"
[[8]]
[1] "Iowa" "New Hampshire"
[[9]]
[1] "Iowa"
lapply
在列表上并使用 grep
lapply(subtrees, grep, pattern = "Maine", value = TRUE)
您可能想从中删除空列表,这可以使用 Filter
来完成
Filter(function(x) length(x) > 0, lapply(subtrees, grep, pattern = "Maine", value = TRUE))
#[[1]]
#[1] "Maine"
#[[2]]
#[1] "Maine"
#[[3]]
#[1] "Maine"
#[[4]]
#[1] "Maine"
#[[5]]
#[1] "Maine"
#[[6]]
#[1] "Maine"
tidyverse
方式可以是
purrr::map(subtrees, ~stringr::str_subset(.x, "Maine"))
要获取匹配列表的索引,我们可以使用 grepl
和 which
which(sapply(subtrees, function(x) any(grepl("Maine", x))))
#[1] 1 2 3 4 6 7
我有一个从下面的代码生成的列表,
df <- USArrests
df <- na.omit(df)
df <- scale(df)
d <- dist(df, method = "euclidean")
# Hierarchical clustering using Complete Linkage
hc1 <- hclust(d, method = "complete" )
library(dendextend)
dend15 <- d %>% hclust(method = "average") %>% as.dendrogram
dend15 %>% plot
subtrees <- partition_leaves(dend15)
我想做的是用 grep
为关键字 "Maine" 子集化一个新列表。这可能吗?
数据样本:
head ( subtrees, 20 )
[[1]]
[1] "North Dakota" "Maine" "Iowa" "New Hampshire" "Vermont"
[6] "South Dakota" "West Virginia" "Delaware" "Rhode Island" "Massachusetts"
[11] "New Jersey" "Arkansas" "Kentucky" "Connecticut" "Minnesota"
[16] "Wisconsin" "Idaho" "Montana" "Nebraska" "Wyoming"
[21] "Virginia" "Oklahoma" "Indiana" "Kansas" "Ohio"
[26] "Pennsylvania" "Hawaii" "Utah" "Oregon" "Washington"
[31] "Alaska" "Georgia" "Tennessee" "Alabama" "Louisiana"
[36] "North Carolina" "Mississippi" "South Carolina" "California" "Nevada"
[41] "Florida" "Colorado" "Missouri" "Texas" "Illinois"
[46] "New York" "Arizona" "Michigan" "Maryland" "New Mexico"
[[2]]
[1] "North Dakota" "Maine" "Iowa" "New Hampshire" "Vermont"
[6] "South Dakota" "West Virginia" "Delaware" "Rhode Island" "Massachusetts"
[11] "New Jersey" "Arkansas" "Kentucky" "Connecticut" "Minnesota"
[16] "Wisconsin" "Idaho" "Montana" "Nebraska" "Wyoming"
[21] "Virginia" "Oklahoma" "Indiana" "Kansas" "Ohio"
[26] "Pennsylvania" "Hawaii" "Utah" "Oregon" "Washington"
[[3]]
[1] "North Dakota" "Maine" "Iowa" "New Hampshire" "Vermont"
[6] "South Dakota" "West Virginia"
[[4]]
[1] "North Dakota" "Maine" "Iowa" "New Hampshire"
[[5]]
[1] "North Dakota"
[[6]]
[1] "Maine" "Iowa" "New Hampshire"
[[7]]
[1] "Maine"
[[8]]
[1] "Iowa" "New Hampshire"
[[9]]
[1] "Iowa"
lapply
在列表上并使用 grep
lapply(subtrees, grep, pattern = "Maine", value = TRUE)
您可能想从中删除空列表,这可以使用 Filter
Filter(function(x) length(x) > 0, lapply(subtrees, grep, pattern = "Maine", value = TRUE))
#[[1]]
#[1] "Maine"
#[[2]]
#[1] "Maine"
#[[3]]
#[1] "Maine"
#[[4]]
#[1] "Maine"
#[[5]]
#[1] "Maine"
#[[6]]
#[1] "Maine"
tidyverse
方式可以是
purrr::map(subtrees, ~stringr::str_subset(.x, "Maine"))
要获取匹配列表的索引,我们可以使用 grepl
和 which
which(sapply(subtrees, function(x) any(grepl("Maine", x))))
#[1] 1 2 3 4 6 7