每个列表列中的唯一计数
Count Unique in each list column
我有一个包含由 str_extract_all() 创建的列表列的数据框。我正在尝试识别存在超过 1 个唯一值的情况
#Input =
# List
#1: apple,apple
#2: apple,banana
#3: apple,orange,orange,banana``
dat<-data.table::data.table(
List = list(c("apple","apple"),
c("apple","banana"),
c("apple","orange","orange", "banana")),
Count_Unique = c(1L, 2L, 3L),
Multi = c(FALSE, TRUE, TRUE)
)
我试过dplyr::mutate(Count_Unique = length(unique(List)),但这只是给了我整个数据集的唯一变量的数量。我相信这很简单我只是不知道如何在可能的情况下使用 tidyverse 方法以行方式执行此操作。
#Expected Output =
# List Count_Unique Multi
#1: apple,apple 1 FALSE
#2: apple,banana 2 TRUE
#3: apple,orange,orange,banana 3 TRUE
dat<-data.table::data.table(
List = list(c("apple","apple"),
c("apple","banana"),
c("apple","orange","orange", "banana")),
Count_Unique = c(1L, 2L, 3L),
Multi = c(FALSE, TRUE, TRUE)
)
您可以使用 map_dbl
:
library(dplyr)
library(purrr)
dat %>% mutate(Multi = map_dbl(List, n_distinct) > 1)
# List Count_Unique Multi
#1: apple,apple 1 FALSE
#2: apple,banana 2 TRUE
#3: apple,orange,orange,banana 3 TRUE
使用基础 R :
dat$Multi <- sapply(dat$List, function(x) length(unique(x))) > 1
或在data.table
中:
library(data.table)
setDT(dat)[, Multi := sapply(List, function(x) length(unique(x))) > 1]
使用data.table
library(data.table)
setDT(dat)[, multi := unlist(lapply(List, uniqueN)) > 1]
我有一个包含由 str_extract_all() 创建的列表列的数据框。我正在尝试识别存在超过 1 个唯一值的情况
#Input =
# List
#1: apple,apple
#2: apple,banana
#3: apple,orange,orange,banana``
dat<-data.table::data.table(
List = list(c("apple","apple"),
c("apple","banana"),
c("apple","orange","orange", "banana")),
Count_Unique = c(1L, 2L, 3L),
Multi = c(FALSE, TRUE, TRUE)
)
我试过dplyr::mutate(Count_Unique = length(unique(List)),但这只是给了我整个数据集的唯一变量的数量。我相信这很简单我只是不知道如何在可能的情况下使用 tidyverse 方法以行方式执行此操作。
#Expected Output =
# List Count_Unique Multi
#1: apple,apple 1 FALSE
#2: apple,banana 2 TRUE
#3: apple,orange,orange,banana 3 TRUE
dat<-data.table::data.table(
List = list(c("apple","apple"),
c("apple","banana"),
c("apple","orange","orange", "banana")),
Count_Unique = c(1L, 2L, 3L),
Multi = c(FALSE, TRUE, TRUE)
)
您可以使用 map_dbl
:
library(dplyr)
library(purrr)
dat %>% mutate(Multi = map_dbl(List, n_distinct) > 1)
# List Count_Unique Multi
#1: apple,apple 1 FALSE
#2: apple,banana 2 TRUE
#3: apple,orange,orange,banana 3 TRUE
使用基础 R :
dat$Multi <- sapply(dat$List, function(x) length(unique(x))) > 1
或在data.table
中:
library(data.table)
setDT(dat)[, Multi := sapply(List, function(x) length(unique(x))) > 1]
使用data.table
library(data.table)
setDT(dat)[, multi := unlist(lapply(List, uniqueN)) > 1]