使用 enquo() 以 dplyr 语法编写可管道函数,不返回预期输出
Writing pipeable function in dplyr syntax, using enquo(), not returning intended output
library(tidyverse)
library(stringr)
library(janitor)
word_count <- function(data, char_col) {
char_col <- enquo(char_col)
data %>%
select(!!char_col) %>%
mutate(char_col = str_remove_all(!!char_col, '[[:punct:]]')) %>%
mutate(char_col = str_split(!!char_col, ' ')) %>%
separate(char_col, into = paste0('col', 1:30), fill = 'right') %>%
select(-col1) %>%
gather(value = word) %>%
select(word) %>%
remove_empty(c('rows')) %>%
filter(word != '') %>%
mutate(word = str_to_lower(word)) %>%
group_by(word) %>%
summarize(freq = n()) %>%
arrange(desc(freq))
}
iris %>%
as.tibble() %>%
mutate(Species = str_c(Species, ' species')) %>%
word_count(Species)
此代码在函数外部按预期工作,但当我在函数内部使用它时,它将 return 每个单词和每个 'non-split' 字符串的频率。
我认为这是我放置“!!”的方式的问题运营商,但我无法通过与他们一起试错来解决这个问题。这也可能是一个懒惰的问题,我不确定如何解决。
我希望函数的输出与下面代码的输出匹配。
iris %>%
as.tibble() %>%
mutate(Species = str_c(Species, ' species')) %>%
select(Species) %>%
mutate(Species = str_remove_all(Species, '[[:punct:]]')) %>%
mutate(Species = str_split(Species, ' ')) %>%
separate(Species, into = paste0('col', 1:30), fill = 'right') %>%
select(-col1) %>%
gather(value = word) %>%
select(word) %>%
remove_empty(c('rows')) %>%
filter(word != '') %>%
mutate(word = str_to_lower(word)) %>%
group_by(word) %>%
summarize(freq = n()) %>%
arrange(desc(freq))
我们不能将 char_col
对象作为列名分配给 mutate
。它需要被评估。 char_col
是一个 quosure
对象,可以将其转换为 character
(quo_name(char_col)
) 或 symbol
,在评估时 (!!
) 将分配 ( :=
) 正确的列名
word_count <- function(data, char_col) {
char_col <- enquo(char_col)
char_colC <- quo_name(char_col)
data %>%
select(!!char_col) %>%
mutate(!!char_colC := str_remove_all(!!char_col, '[[:punct:]]')) %>%
mutate(!!char_colC := str_split(!!char_col, ' ')) %>%
separate(char_colC, into = paste0('col', 1:30), fill = 'right') %>%
select(-col1) %>%
gather(value = word) %>%
select(word) %>%
remove_empty(c('rows')) %>%
filter(word != '') %>%
mutate(word = str_to_lower(word)) %>%
group_by(word) %>%
summarize(freq = n()) %>%
arrange(desc(freq))
}
out2 <- iris %>%
as.tibble() %>%
mutate(Species =str_c(Species, ' species')) %>%
word_count(Species)
-不使用函数
检查输出
out1 <- iris %>%
as.tibble() %>%
mutate(Species = str_c(Species, ' species')) %>%
select(Species) %>%
mutate(Species = str_remove_all(Species, '[[:punct:]]')) %>%
mutate(Species = str_split(Species, ' ')) %>%
separate(Species, into = paste0('col', 1:30), fill = 'right') %>%
select(-col1) %>%
gather(value = word) %>%
select(word) %>%
remove_empty(c('rows')) %>%
filter(word != '') %>%
mutate(word = str_to_lower(word)) %>%
group_by(word) %>%
summarize(freq = n()) %>%
arrange(desc(freq))
identical(out1, out2)
#[1] TRUE
library(tidyverse)
library(stringr)
library(janitor)
word_count <- function(data, char_col) {
char_col <- enquo(char_col)
data %>%
select(!!char_col) %>%
mutate(char_col = str_remove_all(!!char_col, '[[:punct:]]')) %>%
mutate(char_col = str_split(!!char_col, ' ')) %>%
separate(char_col, into = paste0('col', 1:30), fill = 'right') %>%
select(-col1) %>%
gather(value = word) %>%
select(word) %>%
remove_empty(c('rows')) %>%
filter(word != '') %>%
mutate(word = str_to_lower(word)) %>%
group_by(word) %>%
summarize(freq = n()) %>%
arrange(desc(freq))
}
iris %>%
as.tibble() %>%
mutate(Species = str_c(Species, ' species')) %>%
word_count(Species)
此代码在函数外部按预期工作,但当我在函数内部使用它时,它将 return 每个单词和每个 'non-split' 字符串的频率。
我认为这是我放置“!!”的方式的问题运营商,但我无法通过与他们一起试错来解决这个问题。这也可能是一个懒惰的问题,我不确定如何解决。
我希望函数的输出与下面代码的输出匹配。
iris %>%
as.tibble() %>%
mutate(Species = str_c(Species, ' species')) %>%
select(Species) %>%
mutate(Species = str_remove_all(Species, '[[:punct:]]')) %>%
mutate(Species = str_split(Species, ' ')) %>%
separate(Species, into = paste0('col', 1:30), fill = 'right') %>%
select(-col1) %>%
gather(value = word) %>%
select(word) %>%
remove_empty(c('rows')) %>%
filter(word != '') %>%
mutate(word = str_to_lower(word)) %>%
group_by(word) %>%
summarize(freq = n()) %>%
arrange(desc(freq))
我们不能将 char_col
对象作为列名分配给 mutate
。它需要被评估。 char_col
是一个 quosure
对象,可以将其转换为 character
(quo_name(char_col)
) 或 symbol
,在评估时 (!!
) 将分配 ( :=
) 正确的列名
word_count <- function(data, char_col) {
char_col <- enquo(char_col)
char_colC <- quo_name(char_col)
data %>%
select(!!char_col) %>%
mutate(!!char_colC := str_remove_all(!!char_col, '[[:punct:]]')) %>%
mutate(!!char_colC := str_split(!!char_col, ' ')) %>%
separate(char_colC, into = paste0('col', 1:30), fill = 'right') %>%
select(-col1) %>%
gather(value = word) %>%
select(word) %>%
remove_empty(c('rows')) %>%
filter(word != '') %>%
mutate(word = str_to_lower(word)) %>%
group_by(word) %>%
summarize(freq = n()) %>%
arrange(desc(freq))
}
out2 <- iris %>%
as.tibble() %>%
mutate(Species =str_c(Species, ' species')) %>%
word_count(Species)
-不使用函数
检查输出out1 <- iris %>%
as.tibble() %>%
mutate(Species = str_c(Species, ' species')) %>%
select(Species) %>%
mutate(Species = str_remove_all(Species, '[[:punct:]]')) %>%
mutate(Species = str_split(Species, ' ')) %>%
separate(Species, into = paste0('col', 1:30), fill = 'right') %>%
select(-col1) %>%
gather(value = word) %>%
select(word) %>%
remove_empty(c('rows')) %>%
filter(word != '') %>%
mutate(word = str_to_lower(word)) %>%
group_by(word) %>%
summarize(freq = n()) %>%
arrange(desc(freq))
identical(out1, out2)
#[1] TRUE