合并 tibble 中的行

Merge rows in tibble

我想在 table 中列出我的包中的所有功能。

到目前为止,我从包帮助文档中提取了所有函数和标题

library(magrittr)
package_info <- library(help = magrittr)$info[[2]]
package_info_tbl <- package_info %>% 
  stringr::str_split(pattern = "\s+", n = 2, simplify = T) %>%
  tibble::as_tibble(.name_repair = "minimal")
colnames(package_info_tbl) <- c("Function", "Title")

package_info_tbl
#> # A tibble: 13 x 2
#>    Function     Title                                          
#>    <chr>        <chr>                                          
#>  1 "%$%"        magrittr exposition pipe-operator              
#>  2 "%<>%"       magrittr compound assignment pipe-operator     
#>  3 "%>%"        magrittr forward-pipe operator                 
#>  4 "%T>%"       magrittr tee operator                          
#>  5 "[[.fseq"    Extract function(s) from a functional sequence.
#>  6 "debug_fseq" Debugging function for functional sequences.   
#>  7 "debug_pipe" Debugging function for magrittr pipelines.     
#>  8 "extract"    Aliases                                        
#>  9 "freduce"    Apply a list of functions sequentially         
#> 10 "functions"  Extract the function list from a functional    
#> 11 ""           sequence.                                      
#> 12 "magrittr"   magrittr - Ceci n'est pas un pipe              
#> 13 "print.fseq" Print method for functional sequence.

reprex package (v0.3.0)

于 2020-03-29 创建

我发现有些行是分开的,如果标题很长,会导致 2 行或更多行。如何合并这些行?

您可以使用汇总来聚合这些行。之前,分配哪些线属于一起。一个简单的 locf 就足够了:

library("zoo")
library(tidyr)
library(magrittr)
library(dplyr)
package_info <- library(help = magrittr)$info[[2]]
package_info_tbl <- package_info %>% 
  stringr::str_split(pattern = "\s+", n = 2, simplify = T) %>%
  # set colnames
  `colnames<-`(c("Function", "Title")) %>% 
  tibble::as_tibble() %>% 
  # explicit NAs
  dplyr::mutate(Function = if_else(Function == "", NA_character_, Function),
                # replace NAs with prior value
                Function = zoo::na.locf(Function)) %>% 
  # paste together the strings for each function
  group_by(Function) %>% 
  summarise(Title = paste(Title, collapse = " "))

如果为空,请使用前一行的值填充 Function 列。如果 Function 相同,则折叠 Title

package_info_tbl$Function <- Reduce(function(x,y) if (y=="") x else y, package_info_tbl$Function, acc=T) %>%

package_info_tbl <- package_info_tbl %>% 
  group_by(Function) %>%
  summarise(Title = paste(Title, collapse  = " "))

或者,合并到您的 dplyr 链中

package_info_tbl <- package_info %>% 
      stringr::str_split(pattern = "\s+", n = 2, simplify = T) %>%
      tibble::as_tibble(.name_repair = "minimal") %>%
      setNames(., c("Function", "Title")) %>%
      mutate(Function = Reduce(function(x,y) if (y=="") x else y, Function, acc=T)) %>%
      group_by(Function) %>%
      summarise(Title = paste(Title, collapse  = " ")) %>%
      ungroup

输出

package_info_tbl

# # A tibble: 12 x 2
#    Function   Title                                                
#    <chr>      <chr>                                                
#  1 %$%        magrittr exposition pipe-operator                    
#  2 %<>%       magrittr compound assignment pipe-operator           
#  3 %>%        magrittr forward-pipe operator                       
#  4 %T>%       magrittr tee operator                                
#  5 [[.fseq    Extract function(s) from a functional sequence.      
#  6 debug_fseq Debugging function for functional sequences.         
#  7 debug_pipe Debugging function for magrittr pipelines.           
#  8 extract    Aliases                                              
#  9 freduce    Apply a list of functions sequentially               
# 10 functions  Extract the function list from a functional sequence.
# 11 magrittr   magrittr - Ceci n'est pas un pipe                    
# 12 print.fseq Print method for functional sequence.  

我们可以用 NA 值替换空白,使用 fillNA 替换为 Function 列中的先前值,group_by Function 并为每个 Function 创建一个连接的字符串。

library(dplyr)

package_info_tbl %>%
  na_if('') %>%
  tidyr::fill(Function)  %>%
  group_by(Function) %>%
  summarise(Title = paste(Title, collapse = " "))


# A tibble: 12 x 2
#   Function   Title                                                
#   <chr>      <chr>                                                
# 1 [[.fseq    Extract function(s) from a functional sequence.      
# 2 %<>%       magrittr compound assignment pipe-operator           
# 3 %>%        magrittr forward-pipe operator                       
# 4 %$%        magrittr exposition pipe-operator                    
# 5 %T>%       magrittr tee operator                                
# 6 debug_fseq Debugging function for functional sequences.         
# 7 debug_pipe Debugging function for magrittr pipelines.           
# 8 extract    Aliases                                              
# 9 freduce    Apply a list of functions sequentially               
#10 functions  Extract the function list from a functional sequence.
#11 magrittr   magrittr - Ceci n'est pas un pipe                    
#12 print.fseq Print method for functional sequence.               

我们也可以str_c

library(dplyr)
library(tidyr)
library(stringr)
package_info_tbl %>%
  na_if('') %>%
  fill(Function)  %>%
  group_by(Function) %>%
  summarise(Title = str_c(Title, collapse = " "))