如何将具有相同列名的值组合到 R 中的新数据框中?

How to combine values with the same column name into a new dataframe in R?

我有以下数据集

原始数据集:

ID    Col1    Col1    Col1    Col2    Col2    Col2
A     Dog                     House
B     Dog                             Car     Bike
C             Cat             House
D                     Mouse                   Bike

有没有办法创建一个新的数据框,将所有值与相同的列名组合在一起,如下所示

预期数据集:

ID    Col1    Col2    
A     Dog     House
B     Dog     Car, Bike
C     Cat     House
D     Mouse   Bike

你可以这样做:

df <- structure(list(
  ID = c("A", "B", "C", "D"),
  Col1 = c("Dog", "Dog", NA, NA),
  Col1 = c(NA, NA, "Cat", NA),
  Col1 = c(NA, NA, NA, "Mouse"),
  Col2 = c("House", NA, "House", NA),
  Col2 = c(NA, "Car", NA, NA),
  Col2 = c(NA, "Bike", NA, "Bike")
),
class = c("data.frame"), row.names = c(NA, -4L)
)

library(dplyr)
library(tidyr)
library(purrr)

vars_to_unite <- unique(names(df))[unique(names(df)) != "ID"]
renamed_df <- as_tibble(df, .name_repair = "unique")

map_dfc(vars_to_unite, 
        ~unite(
          select(renamed_df, starts_with(.x)), 
          col = !!.x, sep = ", ", na.rm = TRUE
        )) %>% 
  mutate(ID = df$ID)

#> # A tibble: 4 × 3
#>   Col1  Col2      ID   
#>   <chr> <chr>     <chr>
#> 1 Dog   House     A    
#> 2 Dog   Car, Bike B    
#> 3 Cat   House     C    
#> 4 Mouse Bike      D

reprex package (v2.0.1)

创建于 2022-06-01

基础 R 解决方案:

# Input data: df => data.frame
df <- structure(list(
  ID = c("A", "B", "C", "D"),
  Col1 = c("Dog", "Dog", NA, NA),
  Col1 = c(NA, NA, "Cat", NA),
  Col1 = c(NA, NA, NA, "Mouse"),
  Col2 = c("House", NA, "House", NA),
  Col2 = c(NA, "Car", NA, NA),
  Col2 = c(NA, "Bike", NA, "Bike")
),
  class = c("data.frame"), row.names = c(NA, -4L)
)


# Split-Apply-Combine: res => data.frame
res <- data.frame(
  do.call(
    cbind, 
      lapply(
        split.default(
          df,
          names(df)
        ),
        function(x){
          apply(
            x, 
            1, 
            FUN = function(y){
              toString(
                na.omit(y)
              )
            }
          )
        }
      )
  )[,unique(names(df))],
  stringsAsFactors = FALSE,
  row.names = row.names(df)
)

# output Result: data.frame => stdout(console)
res