bind tf_df 在 R 中的用法

usage of bind tf_df in R

    library(janeaustenr)
    library(tidytext)
    library(tidyverse)
    library(tm)
    library(corpus)

   text <- removeNumbers(sensesensibility)

text <- data.frame(text)

tidy_text <- text %>% unnest_tokens(bigram,text,token='ngrams',n=2)


tidy_text %>%count(bigram,sort =TRUE)
             
             
tidy_text <-tidy_text %>% separate(bigram,c('word1','word2'),sep =' ')

tidy_text_filtered <- tidy_text %>% 
                      filter(!word1 %in% stop_words$word)%>%
                      filter(!word2 %in% stop_words$word)
                   


trigram_count <- tidy_text_filtered%>% count(word1,word2, sort= TRUE)
                

united <- trigram_count%>%unite(bigram,word1,word2,sep=' ')%>%
          filter(n >1)

united <- united %>% bind_tf_idf(bigram,n)

但是我收到此错误:“tapply(n, documents, sum) 出错:参数必须具有相同的长度”

我对 bind_tf_df

的使用可能有什么问题

bind_tf_idf包括三个参数'term'、'document'和'n'。我们可以创建 'document' 列

out <- united %>%
             mutate(book = 'sensesensibility') %>%
             bind_tf_idf(bigram, document = book, n) 

如果我们需要创建 'chapters',请检查原始 'text' data.frame

中的单词 'chapter'
library(stringr)
text <- text %>% 
     mutate(chapter = cumsum(str_detect(text, 
              regex("chapter",ignore_case = TRUE))))
tidy_text <- text %>% 
              unnest_tokens(bigram,text,token='ngrams',n=2)
...

trigram_count <- tidy_text_filtered%>%
                  count(chapter, word1,word2, sort= TRUE)
united <- trigram_count%>%
                 unite(bigram,word1,word2,sep=' ')%>%
                 filter(n >1)

out <- united %>%                 
             bind_tf_idf(bigram, document = chapter, n) 
head(out)
#  chapter          bigram  n        tf       idf    tf_idf
#1      21        sir john 12 0.2068966 0.9162907 0.1895774
#2      21    miss steeles 11 0.1896552 2.1202635 0.4021189
#3       9        sir john  9 0.6000000 0.9162907 0.5497744
#4      13        sir john  9 0.3750000 0.9162907 0.3436090
#5      23  lady middleton  9 0.4090909 1.0788097 0.4413312
#6      40 colonel brandon  9 0.4736842 0.6931472 0.3283329