R 中的 rbind - 匹配列名

rbind in R - matching the column names

如何绑定我的两个 table,按列名匹配?第一个 table 看起来像这样:

result
     size KUALA LUMPUR   OTHERS  PENANG SELANGOR DARUL EHSAN   TOTAL
1:    big        0.000  116.861 138.366                0.000 136.207
2: medium      187.874 9656.649   0.000                0.000 223.415
3:  small      435.344  245.598 333.317              272.342 348.692
4:   tiny        0.000    0.000   0.000                0.000   0.000

第二个table只有两列:

SummaryRegion
     experience              region1
1:      156.523               PENANG
2:      272.342 SELANGOR DARUL EHSAN
3:      343.998         KUALA LUMPUR
4:      296.601               OTHERS

我想要的输出是这样的:

resultALL
     size KUALA LUMPUR   OTHERS  PENANG SELANGOR DARUL EHSAN   TOTAL
1:    big        0.000  116.861 138.366                0.000 136.207
2: medium      187.874 9656.649   0.000                0.000 223.415
3:  small      435.344  245.598 333.317              272.342 348.692
4:   tiny        0.000    0.000   0.000                0.000   0.000
5:  TOTAL      343.998  296.601 156.523              272.342      NA

我首先尝试转置 results,然后通过 region 合并,所以当我将其转置回去时,我会得到我想要的结果。不幸的是,我在行名和列名方面遇到了麻烦,但我确信有一个简单的解决方案吗?

你可以试试:

#Data
result <- structure(list(size = structure(1:4, .Label = c("big", "medium", 
"small", "tiny"), class = "factor"), KUALALUMPUR = c(0, 187.874, 
435.344, 0), OTHERS = c(116.861, 9656.649, 245.598, 0), PENANG = c(138.366, 
0, 333.317, 0), SELANGOR = c(NA, NA, NA, NA), DARUL = c(NA, NA, 
NA, NA), EHSAN = c(0, 0, 272.342, 0), TOTAL = c(136.207, 223.415, 
348.692, 0)), class = "data.frame", row.names = c(NA, -4L))

#Compute totals
vec <- as.data.frame(t(c(size='TOTAL',colSums(result[,-c(1,8)],na.rm=T),TOTAL=NA)))
#Bind
DF <- rbind(result,vec)

    size KUALALUMPUR    OTHERS  PENANG SELANGOR DARUL   EHSAN   TOTAL
1    big           0   116.861 138.366     <NA>  <NA>       0 136.207
2 medium     187.874  9656.649       0     <NA>  <NA>       0 223.415
3  small     435.344   245.598 333.317     <NA>  <NA> 272.342 348.692
4   tiny           0         0       0     <NA>  <NA>       0       0
5  TOTAL     623.218 10019.108 471.683        0     0 272.342    <NA>

或者使用你的第二个数据框:

#Data
SummaryRegion <- structure(list(experience = c(156.523, 272.342, 343.998, 296.601
), region1 = structure(c(4L, 1L, 2L, 3L), .Label = c("EHSAN", 
"KUALALUMPUR", "OTHERS", "PENANG"), class = "factor")), class = "data.frame", row.names = c("PENANG", 
"EHSAN", "KUALALUMPUR", "OTHERS"))

library(plyr)
#Using provided df
rownames(SummaryRegion)<-SummaryRegion$region1
vec2 <- as.data.frame(t(SummaryRegion[,c(1),drop=F]))
DF2 <- rbind.fill(result,vec2)


    size KUALALUMPUR   OTHERS  PENANG SELANGOR DARUL   EHSAN   TOTAL
1    big       0.000  116.861 138.366       NA    NA   0.000 136.207
2 medium     187.874 9656.649   0.000       NA    NA   0.000 223.415
3  small     435.344  245.598 333.317       NA    NA 272.342 348.692
4   tiny       0.000    0.000   0.000       NA    NA   0.000   0.000
5   <NA>     343.998  296.601 156.523       NA    NA 272.342      NA

For better readability,it is suggested to have headers without white spaces。这是使用 base R 的一种可能的解决方案:

数据集:

dput(result)
structure(c("size", "KUALA LUMPUR", "OTHERS", "PENANG", "SELANGOR DARUL EHSAN", 
"TOTAL", "big", "  0.000", " 116.861", "138.366", "  0.000", 
"136.207", "medium", "187.874", "9656.649", "  0.000", "  0.000", 
"223.415", "small", "435.344", " 245.598", "333.317", "272.342", 
"348.692", "tiny", "  0.000", "   0.000", "  0.000", "  0.000", 
"  0.000"), .Dim = 6:5, .Dimnames = list(NULL, NULL))

dput(SummaryRegion)
structure(list(experience = c(156.523, 272.342, 343.998, 296.601
), region1 = c("PENANG", "SELANGOR DARUL EHSAN", "KUALA LUMPUR", 
"OTHERS")), row.names = c("1:00", "2:00", "3:00", "4:00"), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x7ffd9702b8e0>)

步骤:

result<-t(result)
result<-cbind(row.names(result),result)
row.names(result)<-NULL

result2<- merge(result,SummaryRegion, by.x="V1", by.y="region1",all=TRUE) 
result2<-as.data.frame(t(result2))

names(result2)<-result2[1,]
result2<-result2[-1,]
row.names(result2)<-NULL

输出:

result2

  KUALA LUMPUR   OTHERS  PENANG SELANGOR DARUL EHSAN   size   TOTAL
1        0.000  116.861 138.366                0.000    big 136.207
2      187.874 9656.649   0.000                0.000 medium 223.415
3      435.344  245.598 333.317              272.342  small 348.692
4        0.000    0.000   0.000                0.000   tiny   0.000
5      343.998  296.601 156.523              272.342   <NA>    <NA>

这是一个dplyr选项,使用来自@Duck的数据。

SummaryRegion_wide <- SummaryRegion %>%
  pivot_wider(names_from = region1, values_from = experience) %>%
  mutate(size = 'Total')

result %>%
  bind_rows(SummaryRegion_wide)


# ----
    size KUALALUMPUR   OTHERS  PENANG SELANGOR DARUL   EHSAN   TOTAL
1    big       0.000  116.861 138.366       NA    NA   0.000 136.207
2 medium     187.874 9656.649   0.000       NA    NA   0.000 223.415
3  small     435.344  245.598 333.317       NA    NA 272.342 348.692
4   tiny       0.000    0.000   0.000       NA    NA   0.000   0.000
5  TOTAL     343.998  296.601 156.523       NA    NA 272.342      NA