在 R 中,创建一列用引号括起来并用逗号分隔的标签名称
In R, creating a column of tag names that are surrounded by quotes and separated by commas
在 R 中,我正在尝试为 tibble 创建一个列,其中将包含一串标签,这些标签将用于在 MailChimp 中分割电子邮件列表。
我将上传到 MailChimp 的输出 CSV 中标签列的格式应该是一个字符串:“属性 1”、“属性 2”
我怀疑这样做会涉及使用 if 语句创建函数,并根据 TRUE/FALSE 逻辑添加标签。
数据结构如下所示:
library(tidyverse)
person <- c("Person 1", "Person 2", "Person 3", "Person 4")
is_in_A <- c(TRUE, FALSE, TRUE, FALSE)
is_in_B <- c(TRUE, FALSE, FALSE, TRUE)
is_in_C <- c(TRUE, TRUE, TRUE, TRUE)
example_tbl <- tibble(person = person, is_in_A = is_in_A, is_in_B, is_in_C = is_in_C)
this is what the data looks like:
example_tbl
# person is_in_A is_in_B is_in_C
# <chr> <lgl> <lgl> <lgl>
# Person 1 TRUE TRUE TRUE
# Person 2 FALSE FALSE TRUE
# Person 3 TRUE FALSE TRUE
# Person 4 FALSE TRUE TRUE
输出应该是这样的:
tags_1 <- '"is in A", "is in B", "is in C"'
tags_2 <- '"is in C"'
tags_3 <- '"is in A", "is in C"'
tags_4 <- '"is in B", "is in C"'
all_tags <- c(tags_1, tags_2, tags_3, tags_4)
example_output_tbl <- example_tbl %>%
add_column(tags = all_tags)
# this is what the output should look like
example_output_tbl
# person is_in_A is_in_B is_in_C tags
# <chr> <lgl> <lgl> <lgl> <chr>
# Person 1 TRUE TRUE TRUE "is in A", "is in B", "is in C"
# Person 2 FALSE FALSE TRUE "is in C"
# Person 3 TRUE FALSE TRUE "is in A", "is in C"
# Person 4 FALSE TRUE TRUE "is in B", "is in C"
欢迎提出任何建议。
我们可以在使用 'value' 列建立索引后将 'long' 格式和 paste
的元素重塑为 'name' ,并与原始数据集连接
library(dplyr)
library(tidyr)
library(stringr)
example_tbl %>%
pivot_longer(cols = -person) %>%
group_by(person) %>%
summarise(tag = str_c(dQuote(name[value], FALSE), collapse=", ")) %>%
left_join(example_tbl) %>%
select(names(example_tbl), everything())
# A tibble: 4 x 5
# person is_in_A is_in_B is_in_C tag
# <chr> <lgl> <lgl> <lgl> <chr>
#1 Person 1 TRUE TRUE TRUE "\"is_in_A\", \"is_in_B\", \"is_in_C\""
#2 Person 2 FALSE FALSE TRUE "\"is_in_C\""
#3 Person 3 TRUE FALSE TRUE "\"is_in_A\", \"is_in_C\""
#4 Person 4 FALSE TRUE TRUE "\"is_in_B\", \"is_in_C\""
这个base R
可以帮助:
example_tbl$Col <- apply(example_tbl[,-1],1,function(x) paste(names(example_tbl[,-1])[which(x==T)],collapse = ','))
# A tibble: 4 x 5
person is_in_A is_in_B is_in_C Col
<chr> <lgl> <lgl> <lgl> <chr>
1 Person 1 TRUE TRUE TRUE is_in_A,is_in_B,is_in_C
2 Person 2 FALSE FALSE TRUE is_in_C
3 Person 3 TRUE FALSE TRUE is_in_A,is_in_C
4 Person 4 FALSE TRUE TRUE is_in_B,is_in_C
将 rowwise
与 c_across
一起使用:
library(dplyr)
cols <- names(example_tbl)[-1]
example_tbl %>%
rowwise() %>%
mutate(tags = toString(dQuote(cols[c_across(starts_with('is_in'))])))
# person is_in_A is_in_B is_in_C tags
# <chr> <lgl> <lgl> <lgl> <chr>
#1 Person 1 TRUE TRUE TRUE “is_in_A”, “is_in_B”, “is_in_C”
#2 Person 2 FALSE FALSE TRUE “is_in_C”
#3 Person 3 TRUE FALSE TRUE “is_in_A”, “is_in_C”
#4 Person 4 FALSE TRUE TRUE “is_in_B”, “is_in_C”
或 pmap_chr
来自 purrr
:
example_tbl %>%
mutate(tags = purrr::pmap_chr(select(., starts_with('is_in')),
~toString(cols[c(...)] )))
在 R 中,我正在尝试为 tibble 创建一个列,其中将包含一串标签,这些标签将用于在 MailChimp 中分割电子邮件列表。
我将上传到 MailChimp 的输出 CSV 中标签列的格式应该是一个字符串:“属性 1”、“属性 2”
我怀疑这样做会涉及使用 if 语句创建函数,并根据 TRUE/FALSE 逻辑添加标签。
数据结构如下所示:
library(tidyverse)
person <- c("Person 1", "Person 2", "Person 3", "Person 4")
is_in_A <- c(TRUE, FALSE, TRUE, FALSE)
is_in_B <- c(TRUE, FALSE, FALSE, TRUE)
is_in_C <- c(TRUE, TRUE, TRUE, TRUE)
example_tbl <- tibble(person = person, is_in_A = is_in_A, is_in_B, is_in_C = is_in_C)
this is what the data looks like:
example_tbl
# person is_in_A is_in_B is_in_C
# <chr> <lgl> <lgl> <lgl>
# Person 1 TRUE TRUE TRUE
# Person 2 FALSE FALSE TRUE
# Person 3 TRUE FALSE TRUE
# Person 4 FALSE TRUE TRUE
输出应该是这样的:
tags_1 <- '"is in A", "is in B", "is in C"'
tags_2 <- '"is in C"'
tags_3 <- '"is in A", "is in C"'
tags_4 <- '"is in B", "is in C"'
all_tags <- c(tags_1, tags_2, tags_3, tags_4)
example_output_tbl <- example_tbl %>%
add_column(tags = all_tags)
# this is what the output should look like
example_output_tbl
# person is_in_A is_in_B is_in_C tags
# <chr> <lgl> <lgl> <lgl> <chr>
# Person 1 TRUE TRUE TRUE "is in A", "is in B", "is in C"
# Person 2 FALSE FALSE TRUE "is in C"
# Person 3 TRUE FALSE TRUE "is in A", "is in C"
# Person 4 FALSE TRUE TRUE "is in B", "is in C"
欢迎提出任何建议。
我们可以在使用 'value' 列建立索引后将 'long' 格式和 paste
的元素重塑为 'name' ,并与原始数据集连接
library(dplyr)
library(tidyr)
library(stringr)
example_tbl %>%
pivot_longer(cols = -person) %>%
group_by(person) %>%
summarise(tag = str_c(dQuote(name[value], FALSE), collapse=", ")) %>%
left_join(example_tbl) %>%
select(names(example_tbl), everything())
# A tibble: 4 x 5
# person is_in_A is_in_B is_in_C tag
# <chr> <lgl> <lgl> <lgl> <chr>
#1 Person 1 TRUE TRUE TRUE "\"is_in_A\", \"is_in_B\", \"is_in_C\""
#2 Person 2 FALSE FALSE TRUE "\"is_in_C\""
#3 Person 3 TRUE FALSE TRUE "\"is_in_A\", \"is_in_C\""
#4 Person 4 FALSE TRUE TRUE "\"is_in_B\", \"is_in_C\""
这个base R
可以帮助:
example_tbl$Col <- apply(example_tbl[,-1],1,function(x) paste(names(example_tbl[,-1])[which(x==T)],collapse = ','))
# A tibble: 4 x 5
person is_in_A is_in_B is_in_C Col
<chr> <lgl> <lgl> <lgl> <chr>
1 Person 1 TRUE TRUE TRUE is_in_A,is_in_B,is_in_C
2 Person 2 FALSE FALSE TRUE is_in_C
3 Person 3 TRUE FALSE TRUE is_in_A,is_in_C
4 Person 4 FALSE TRUE TRUE is_in_B,is_in_C
将 rowwise
与 c_across
一起使用:
library(dplyr)
cols <- names(example_tbl)[-1]
example_tbl %>%
rowwise() %>%
mutate(tags = toString(dQuote(cols[c_across(starts_with('is_in'))])))
# person is_in_A is_in_B is_in_C tags
# <chr> <lgl> <lgl> <lgl> <chr>
#1 Person 1 TRUE TRUE TRUE “is_in_A”, “is_in_B”, “is_in_C”
#2 Person 2 FALSE FALSE TRUE “is_in_C”
#3 Person 3 TRUE FALSE TRUE “is_in_A”, “is_in_C”
#4 Person 4 FALSE TRUE TRUE “is_in_B”, “is_in_C”
或 pmap_chr
来自 purrr
:
example_tbl %>%
mutate(tags = purrr::pmap_chr(select(., starts_with('is_in')),
~toString(cols[c(...)] )))