垂直合并2个变量R tidyverse

Merge 2 variables vertically R tidyverse

我用 2 种语言进行了一项调查,我想将 2 种语言的问题合并到一个变量中。

表格的答案都是一样的data.frame。日期是我的主键。不幸的是,我还是 R 的新手,无法找到如何优雅地组合这些。

示例情况

日期 Place_English Plane_English Place_French Plane_French
一个 杜鹃花 三个
两个 ertt

将来

日期 地点 飞机
一个 杜鹃花 三个
两个 ertt

这应该可以解决问题

df %>%
  as_tibble() %>% 
  mutate_if(is.character, list(~na_if(.,""))) %>% #only needed if the missing fields are stored as blanks and not already NA
  transmute(
    Date,
    Place = coalesce(Place_English, Place_French),
    Plane = coalesce(Plane_English, Plane_French)
  )

只是为了跟进我的评论,假设空值为 NA:

library(tidyverse)

正在创建数据:

df <- data.frame(place_english = c(NA, "ertert"), 
                 plane_english = c(NA, "ertt"), 
                 place_french = c("azea", NA), 
                 plane_french=c("Three", NA),
                 stringsAsFactors = F)

使用合并将 NA 替换为第一个非 NA 值:

df %>% mutate(Plane = coalesce(plane_english, plane_french),
              Place = coalesce(place_english, place_french),
             )
Source: local data frame [2 x 6]
Groups: <by row>

# A tibble: 2 x 6
  place_english plane_english place_french plane_french Plane Place 
  <chr>         <chr>         <chr>        <chr>        <chr> <chr> 
1 NA            NA            azea         Three        Three azea  
2 ertert        ertt          NA           NA           ertt  ertert

您也可以使用例如

一次对一列实现相同的效果
df$Place <- coalesce(df$place_english, df$place_french)

两种方法,都使用dplyr

情况 1:如果存在 NA/missing 个值

df <- read.table(header = T, text = "Date   Place_English   Plane_English   Place_French    Plane_French
One NA NA   azea    Three
Two ertert  ertt    NA NA   ")

library(dplyr)

df %>%
  mutate(across(ends_with('_English'), ~ coalesce(., get(gsub('_English', '_French', cur_column()))),
                   .names = "{gsub('_English', '', .col)}"), .keep = 'unused')
#>   Date  Place Plane
#> 1  One   azea Three
#> 2  Two ertert  ertt

case-2:如果有空字符串代替

df <- read.table(header = T, text = "Date   Place_English   Plane_English   Place_French    Plane_French
One '' ''   azea    Three
Two ertert  ertt    ''  ''  ")
library(tidyverse)

df %>%
  mutate(across(ends_with('_English'), ~ paste0(., get(gsub('_English', '_French', cur_column()))),
                   .names = "{gsub('_English', '', .col)}"), .keep = 'unused')
#>   Date  Place Plane
#> 1  One   azea Three
#> 2  Two ertert  ertt

如果您不想丢失任何数据,请使用 paste

library(dplyr)
df%>% mutate(Place = paste(Place_English, Place_French),
             Plane = paste(Plane_English, Plane_French),
             across(Place_English:Plane_French, ~NULL)) ## last line to remove unnecessary columns 

coalesce 如果你想摆脱 NAs

df%>% mutate(Place = coalesce(Place_English, Place_French),
             Plane = coalesce(Plane_English, Plane_French),
             across(Place_English:Plane_French, ~NULL)) ## last line to remove unnecessary columns 

如果您想要合并超过 2 个列,请使用 tidyr 中的 unite。根据您的喜好设置na.rm

library(tidyr)
df %>% 
  unite("Place", colnames(df)[grepl(pattern = "Place", colnames(df))] , remove = T, sep = " ", na.rm = TRUE) %>%  ## all cols including "Place" in name
  unite("Plane", colnames(df)[grepl(pattern = "Plane", colnames(df))] , remove = T, sep = " ", na.rm = TRUE) ## all cols including "Plane" in name
library(tidyr)
cols_to_paste <- colnames(df[,]) ## to choose only sepecified cols i.e. df[,15:25] or df[,c(15,18,20,25)]

df %>% 
  unite('Place', cols_to_paste[grepl(pattern = 'Place', cols_to_paste)] , remove = T, sep = " ", na.rm = TRUE) %>% ## all cols including "Place" in name
  unite('Plane', cols_to_paste[grepl(pattern = 'Plane', cols_to_paste)] , remove = T, sep = " ", na.rm = TRUE) ## all cols including "Plane" in name

如果有 >2 列并且您不想全部输入,您可以使用与@coffeinjunky 相同的方法,但使用 across

df <- data.frame(place_english = c(NA, "ertert"), 
                 plane_english = c(NA, "ertt"), 
                 place_french = c("azea", NA), 
                 plane_french=c("Three", NA),
                 stringsAsFactors = F)

library(dplyr, warn.conflicts = FALSE)

df %>% 
  transmute(place = do.call(coalesce, across(starts_with('place'))), 
            plane = do.call(coalesce, across(starts_with('plane'))))
#>    place plane
#> 1   azea Three
#> 2 ertert  ertt

reprex package (v2.0.1)

于 2021-08-05 创建

这是使用 split.default 的基本 R 方法,它应该对任意数量的组动态工作。

tmp <- df[-1]

result <- cbind(df[1], sapply(split.default(tmp, sub('_.*', '', names(tmp))),
                function(x) do.call(pmax, c(x, na.rm = TRUE))))

result

#  Date  Place Plane
#1  One   azea Three
#2  Two ertert  ertt