在 R 中,创建一列用引号括起来并用逗号分隔的标签名称

In R, creating a column of tag names that are surrounded by quotes and separated by commas

在 R 中,我正在尝试为 tibble 创建一个列,其中将包含一串标签,这些标签将用于在 MailChimp 中分割电子邮件列表。

我将上传到 MailChimp 的输出 CSV 中标签列的格式应该是一个字符串:“属性 1”、“属性 2”

我怀疑这样做会涉及使用 if 语句创建函数,并根据 TRUE/FALSE 逻辑添加标签。

数据结构如下所示:

library(tidyverse)

person <- c("Person 1", "Person 2", "Person 3", "Person 4")
is_in_A <- c(TRUE, FALSE, TRUE, FALSE)
is_in_B <-  c(TRUE, FALSE, FALSE, TRUE)
is_in_C <- c(TRUE, TRUE, TRUE, TRUE)

example_tbl <- tibble(person = person, is_in_A = is_in_A, is_in_B, is_in_C = is_in_C)

this is what the data looks like:
example_tbl

# person   is_in_A is_in_B is_in_C  
# <chr>     <lgl>   <lgl>   <lgl>   
# Person 1  TRUE    TRUE    TRUE    
# Person 2  FALSE   FALSE   TRUE    
# Person 3  TRUE    FALSE   TRUE    
# Person 4  FALSE   TRUE    TRUE    

输出应该是这样的:

tags_1 <- '"is in A", "is in B", "is in C"'
tags_2 <- '"is in C"'
tags_3 <- '"is in A", "is in C"'
tags_4 <- '"is in B", "is in C"'

all_tags <- c(tags_1, tags_2, tags_3, tags_4)

example_output_tbl <- example_tbl %>%
  add_column(tags = all_tags)

# this is what the output should look like
example_output_tbl

# person   is_in_A is_in_B is_in_C  tags
# <chr>     <lgl>   <lgl>   <lgl>   <chr>
# Person 1  TRUE    TRUE    TRUE    "is in A", "is in B", "is in C"
# Person 2  FALSE   FALSE   TRUE    "is in C"
# Person 3  TRUE    FALSE   TRUE    "is in A", "is in C"
# Person 4  FALSE   TRUE    TRUE    "is in B", "is in C"

欢迎提出任何建议。

我们可以在使用 'value' 列建立索引后将 'long' 格式和 paste 的元素重塑为 'name' ,并与原始数据集连接

library(dplyr)
library(tidyr)
library(stringr)
example_tbl %>%
    pivot_longer(cols = -person) %>%
    group_by(person) %>% 
    summarise(tag = str_c(dQuote(name[value], FALSE), collapse=", ")) %>% 
    left_join(example_tbl) %>%
    select(names(example_tbl), everything())
# A tibble: 4 x 5
#  person   is_in_A is_in_B is_in_C tag                                    
#  <chr>    <lgl>   <lgl>   <lgl>   <chr>                                  
#1 Person 1 TRUE    TRUE    TRUE    "\"is_in_A\", \"is_in_B\", \"is_in_C\""
#2 Person 2 FALSE   FALSE   TRUE    "\"is_in_C\""                          
#3 Person 3 TRUE    FALSE   TRUE    "\"is_in_A\", \"is_in_C\""             
#4 Person 4 FALSE   TRUE    TRUE    "\"is_in_B\", \"is_in_C\""      

这个base R可以帮助:

example_tbl$Col <- apply(example_tbl[,-1],1,function(x) paste(names(example_tbl[,-1])[which(x==T)],collapse = ','))

# A tibble: 4 x 5
  person   is_in_A is_in_B is_in_C Col                    
  <chr>    <lgl>   <lgl>   <lgl>   <chr>                  
1 Person 1 TRUE    TRUE    TRUE    is_in_A,is_in_B,is_in_C
2 Person 2 FALSE   FALSE   TRUE    is_in_C                
3 Person 3 TRUE    FALSE   TRUE    is_in_A,is_in_C        
4 Person 4 FALSE   TRUE    TRUE    is_in_B,is_in_C  

rowwisec_across 一起使用:

library(dplyr)
cols <- names(example_tbl)[-1]

example_tbl %>%
  rowwise() %>%
  mutate(tags = toString(dQuote(cols[c_across(starts_with('is_in'))])))

#  person   is_in_A is_in_B is_in_C tags                           
#  <chr>    <lgl>   <lgl>   <lgl>   <chr>                          
#1 Person 1 TRUE    TRUE    TRUE    “is_in_A”, “is_in_B”, “is_in_C”
#2 Person 2 FALSE   FALSE   TRUE    “is_in_C”                      
#3 Person 3 TRUE    FALSE   TRUE    “is_in_A”, “is_in_C”           
#4 Person 4 FALSE   TRUE    TRUE    “is_in_B”, “is_in_C”           

pmap_chr 来自 purrr :

example_tbl %>%
    mutate(tags = purrr::pmap_chr(select(., starts_with('is_in')), 
                  ~toString(cols[c(...)] )))