R 中的 rbind - 匹配列名
rbind in R - matching the column names
如何绑定我的两个 table,按列名匹配?第一个 table 看起来像这样:
result
size KUALA LUMPUR OTHERS PENANG SELANGOR DARUL EHSAN TOTAL
1: big 0.000 116.861 138.366 0.000 136.207
2: medium 187.874 9656.649 0.000 0.000 223.415
3: small 435.344 245.598 333.317 272.342 348.692
4: tiny 0.000 0.000 0.000 0.000 0.000
第二个table只有两列:
SummaryRegion
experience region1
1: 156.523 PENANG
2: 272.342 SELANGOR DARUL EHSAN
3: 343.998 KUALA LUMPUR
4: 296.601 OTHERS
我想要的输出是这样的:
resultALL
size KUALA LUMPUR OTHERS PENANG SELANGOR DARUL EHSAN TOTAL
1: big 0.000 116.861 138.366 0.000 136.207
2: medium 187.874 9656.649 0.000 0.000 223.415
3: small 435.344 245.598 333.317 272.342 348.692
4: tiny 0.000 0.000 0.000 0.000 0.000
5: TOTAL 343.998 296.601 156.523 272.342 NA
我首先尝试转置 results
,然后通过 region
合并,所以当我将其转置回去时,我会得到我想要的结果。不幸的是,我在行名和列名方面遇到了麻烦,但我确信有一个简单的解决方案吗?
你可以试试:
#Data
result <- structure(list(size = structure(1:4, .Label = c("big", "medium",
"small", "tiny"), class = "factor"), KUALALUMPUR = c(0, 187.874,
435.344, 0), OTHERS = c(116.861, 9656.649, 245.598, 0), PENANG = c(138.366,
0, 333.317, 0), SELANGOR = c(NA, NA, NA, NA), DARUL = c(NA, NA,
NA, NA), EHSAN = c(0, 0, 272.342, 0), TOTAL = c(136.207, 223.415,
348.692, 0)), class = "data.frame", row.names = c(NA, -4L))
#Compute totals
vec <- as.data.frame(t(c(size='TOTAL',colSums(result[,-c(1,8)],na.rm=T),TOTAL=NA)))
#Bind
DF <- rbind(result,vec)
size KUALALUMPUR OTHERS PENANG SELANGOR DARUL EHSAN TOTAL
1 big 0 116.861 138.366 <NA> <NA> 0 136.207
2 medium 187.874 9656.649 0 <NA> <NA> 0 223.415
3 small 435.344 245.598 333.317 <NA> <NA> 272.342 348.692
4 tiny 0 0 0 <NA> <NA> 0 0
5 TOTAL 623.218 10019.108 471.683 0 0 272.342 <NA>
或者使用你的第二个数据框:
#Data
SummaryRegion <- structure(list(experience = c(156.523, 272.342, 343.998, 296.601
), region1 = structure(c(4L, 1L, 2L, 3L), .Label = c("EHSAN",
"KUALALUMPUR", "OTHERS", "PENANG"), class = "factor")), class = "data.frame", row.names = c("PENANG",
"EHSAN", "KUALALUMPUR", "OTHERS"))
library(plyr)
#Using provided df
rownames(SummaryRegion)<-SummaryRegion$region1
vec2 <- as.data.frame(t(SummaryRegion[,c(1),drop=F]))
DF2 <- rbind.fill(result,vec2)
size KUALALUMPUR OTHERS PENANG SELANGOR DARUL EHSAN TOTAL
1 big 0.000 116.861 138.366 NA NA 0.000 136.207
2 medium 187.874 9656.649 0.000 NA NA 0.000 223.415
3 small 435.344 245.598 333.317 NA NA 272.342 348.692
4 tiny 0.000 0.000 0.000 NA NA 0.000 0.000
5 <NA> 343.998 296.601 156.523 NA NA 272.342 NA
For better readability,it is suggested to have headers without white spaces
。这是使用 base R 的一种可能的解决方案:
数据集:
dput(result)
structure(c("size", "KUALA LUMPUR", "OTHERS", "PENANG", "SELANGOR DARUL EHSAN",
"TOTAL", "big", " 0.000", " 116.861", "138.366", " 0.000",
"136.207", "medium", "187.874", "9656.649", " 0.000", " 0.000",
"223.415", "small", "435.344", " 245.598", "333.317", "272.342",
"348.692", "tiny", " 0.000", " 0.000", " 0.000", " 0.000",
" 0.000"), .Dim = 6:5, .Dimnames = list(NULL, NULL))
dput(SummaryRegion)
structure(list(experience = c(156.523, 272.342, 343.998, 296.601
), region1 = c("PENANG", "SELANGOR DARUL EHSAN", "KUALA LUMPUR",
"OTHERS")), row.names = c("1:00", "2:00", "3:00", "4:00"), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x7ffd9702b8e0>)
步骤:
result<-t(result)
result<-cbind(row.names(result),result)
row.names(result)<-NULL
result2<- merge(result,SummaryRegion, by.x="V1", by.y="region1",all=TRUE)
result2<-as.data.frame(t(result2))
names(result2)<-result2[1,]
result2<-result2[-1,]
row.names(result2)<-NULL
输出:
result2
KUALA LUMPUR OTHERS PENANG SELANGOR DARUL EHSAN size TOTAL
1 0.000 116.861 138.366 0.000 big 136.207
2 187.874 9656.649 0.000 0.000 medium 223.415
3 435.344 245.598 333.317 272.342 small 348.692
4 0.000 0.000 0.000 0.000 tiny 0.000
5 343.998 296.601 156.523 272.342 <NA> <NA>
这是一个dplyr
选项,使用来自@Duck的数据。
SummaryRegion_wide <- SummaryRegion %>%
pivot_wider(names_from = region1, values_from = experience) %>%
mutate(size = 'Total')
result %>%
bind_rows(SummaryRegion_wide)
# ----
size KUALALUMPUR OTHERS PENANG SELANGOR DARUL EHSAN TOTAL
1 big 0.000 116.861 138.366 NA NA 0.000 136.207
2 medium 187.874 9656.649 0.000 NA NA 0.000 223.415
3 small 435.344 245.598 333.317 NA NA 272.342 348.692
4 tiny 0.000 0.000 0.000 NA NA 0.000 0.000
5 TOTAL 343.998 296.601 156.523 NA NA 272.342 NA
如何绑定我的两个 table,按列名匹配?第一个 table 看起来像这样:
result
size KUALA LUMPUR OTHERS PENANG SELANGOR DARUL EHSAN TOTAL
1: big 0.000 116.861 138.366 0.000 136.207
2: medium 187.874 9656.649 0.000 0.000 223.415
3: small 435.344 245.598 333.317 272.342 348.692
4: tiny 0.000 0.000 0.000 0.000 0.000
第二个table只有两列:
SummaryRegion
experience region1
1: 156.523 PENANG
2: 272.342 SELANGOR DARUL EHSAN
3: 343.998 KUALA LUMPUR
4: 296.601 OTHERS
我想要的输出是这样的:
resultALL
size KUALA LUMPUR OTHERS PENANG SELANGOR DARUL EHSAN TOTAL
1: big 0.000 116.861 138.366 0.000 136.207
2: medium 187.874 9656.649 0.000 0.000 223.415
3: small 435.344 245.598 333.317 272.342 348.692
4: tiny 0.000 0.000 0.000 0.000 0.000
5: TOTAL 343.998 296.601 156.523 272.342 NA
我首先尝试转置 results
,然后通过 region
合并,所以当我将其转置回去时,我会得到我想要的结果。不幸的是,我在行名和列名方面遇到了麻烦,但我确信有一个简单的解决方案吗?
你可以试试:
#Data
result <- structure(list(size = structure(1:4, .Label = c("big", "medium",
"small", "tiny"), class = "factor"), KUALALUMPUR = c(0, 187.874,
435.344, 0), OTHERS = c(116.861, 9656.649, 245.598, 0), PENANG = c(138.366,
0, 333.317, 0), SELANGOR = c(NA, NA, NA, NA), DARUL = c(NA, NA,
NA, NA), EHSAN = c(0, 0, 272.342, 0), TOTAL = c(136.207, 223.415,
348.692, 0)), class = "data.frame", row.names = c(NA, -4L))
#Compute totals
vec <- as.data.frame(t(c(size='TOTAL',colSums(result[,-c(1,8)],na.rm=T),TOTAL=NA)))
#Bind
DF <- rbind(result,vec)
size KUALALUMPUR OTHERS PENANG SELANGOR DARUL EHSAN TOTAL
1 big 0 116.861 138.366 <NA> <NA> 0 136.207
2 medium 187.874 9656.649 0 <NA> <NA> 0 223.415
3 small 435.344 245.598 333.317 <NA> <NA> 272.342 348.692
4 tiny 0 0 0 <NA> <NA> 0 0
5 TOTAL 623.218 10019.108 471.683 0 0 272.342 <NA>
或者使用你的第二个数据框:
#Data
SummaryRegion <- structure(list(experience = c(156.523, 272.342, 343.998, 296.601
), region1 = structure(c(4L, 1L, 2L, 3L), .Label = c("EHSAN",
"KUALALUMPUR", "OTHERS", "PENANG"), class = "factor")), class = "data.frame", row.names = c("PENANG",
"EHSAN", "KUALALUMPUR", "OTHERS"))
library(plyr)
#Using provided df
rownames(SummaryRegion)<-SummaryRegion$region1
vec2 <- as.data.frame(t(SummaryRegion[,c(1),drop=F]))
DF2 <- rbind.fill(result,vec2)
size KUALALUMPUR OTHERS PENANG SELANGOR DARUL EHSAN TOTAL
1 big 0.000 116.861 138.366 NA NA 0.000 136.207
2 medium 187.874 9656.649 0.000 NA NA 0.000 223.415
3 small 435.344 245.598 333.317 NA NA 272.342 348.692
4 tiny 0.000 0.000 0.000 NA NA 0.000 0.000
5 <NA> 343.998 296.601 156.523 NA NA 272.342 NA
For better readability,it is suggested to have headers without white spaces
。这是使用 base R 的一种可能的解决方案:
数据集:
dput(result)
structure(c("size", "KUALA LUMPUR", "OTHERS", "PENANG", "SELANGOR DARUL EHSAN",
"TOTAL", "big", " 0.000", " 116.861", "138.366", " 0.000",
"136.207", "medium", "187.874", "9656.649", " 0.000", " 0.000",
"223.415", "small", "435.344", " 245.598", "333.317", "272.342",
"348.692", "tiny", " 0.000", " 0.000", " 0.000", " 0.000",
" 0.000"), .Dim = 6:5, .Dimnames = list(NULL, NULL))
dput(SummaryRegion)
structure(list(experience = c(156.523, 272.342, 343.998, 296.601
), region1 = c("PENANG", "SELANGOR DARUL EHSAN", "KUALA LUMPUR",
"OTHERS")), row.names = c("1:00", "2:00", "3:00", "4:00"), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x7ffd9702b8e0>)
步骤:
result<-t(result)
result<-cbind(row.names(result),result)
row.names(result)<-NULL
result2<- merge(result,SummaryRegion, by.x="V1", by.y="region1",all=TRUE)
result2<-as.data.frame(t(result2))
names(result2)<-result2[1,]
result2<-result2[-1,]
row.names(result2)<-NULL
输出:
result2
KUALA LUMPUR OTHERS PENANG SELANGOR DARUL EHSAN size TOTAL
1 0.000 116.861 138.366 0.000 big 136.207
2 187.874 9656.649 0.000 0.000 medium 223.415
3 435.344 245.598 333.317 272.342 small 348.692
4 0.000 0.000 0.000 0.000 tiny 0.000
5 343.998 296.601 156.523 272.342 <NA> <NA>
这是一个dplyr
选项,使用来自@Duck的数据。
SummaryRegion_wide <- SummaryRegion %>%
pivot_wider(names_from = region1, values_from = experience) %>%
mutate(size = 'Total')
result %>%
bind_rows(SummaryRegion_wide)
# ----
size KUALALUMPUR OTHERS PENANG SELANGOR DARUL EHSAN TOTAL
1 big 0.000 116.861 138.366 NA NA 0.000 136.207
2 medium 187.874 9656.649 0.000 NA NA 0.000 223.415
3 small 435.344 245.598 333.317 NA NA 272.342 348.692
4 tiny 0.000 0.000 0.000 NA NA 0.000 0.000
5 TOTAL 343.998 296.601 156.523 NA NA 272.342 NA