rbind 具有不同列名的数据帧
rbind dataframes with a different column name
我有 12 个数据框,每个包含 6 列:5 个名称相同,1 个不同。然后当我打电话给 rbind()
我得到:
Error in match.names(clabs, names(xi)) :
names do not match previous names
不同的列是:“goal1Completions”。有 12 个 goalsCompletions...它们是:“goal1Completions”、“goal2Completions”、“goal3Completions”...等等。
我能想到的最好方法是:将每个数据框中的每一列重命名为“GoalsCompletions”,然后使用“rbind()”。
有没有更简单的方法?
在 Google 上查找 O 找到了这个包:“gtools”。它有一个功能叫做:“smartbind”。但是,在使用 smartbind() 之后,我想查看带有“View()”的数据框,我的 R 会话崩溃了...
我的数据(第一个数据框的例子):
date source medium campaign goal1Completions ad.cost Goal
1 2014-10-01 (direct) (none) (not set) 0 0.0000 Vida
2 2014-10-01 Master email CAFRE 0 0.0000 Vida
3 2014-10-01 apeseg referral (not set) 0 0.0000 Vida
我会重命名列。如果列的顺序相同,这对于 names()
来说非常容易。
df1 <- data.frame(one=1:10,two=11:20,three=21:30)
df2 <- data.frame(four=31:40,five=41:50,six=51:60)
names(df2)<-names(df1)
rbind(df1,df2)
或
df1 <- data.frame(one=1:10,two=11:20,three=21:30)
df2 <- data.frame(four=31:40,five=41:50,six=51:60)
rbind(df1,setnames(df2,names(df1)))
结果:
one two three
1 1 11 21
2 2 12 22
3 3 13 23
4 4 14 24
5 5 15 25
6 6 16 26
7 7 17 27
8 8 18 28
9 9 19 29
10 10 20 30
11 31 41 51
12 32 42 52
13 33 43 53
14 34 44 54
15 35 45 55
16 36 46 56
17 37 47 57
18 38 48 58
19 39 49 59
20 40 50 60
我最喜欢的mapply
用法:
示例数据
a <- data.frame(a=runif(5), b=runif(5))
> a
a b
1 0.8403348 0.1579255
2 0.4759767 0.8182902
3 0.8091875 0.1080651
4 0.9846333 0.7035959
5 0.2153991 0.8744136
和b
b <- data.frame(c=runif(5), d=runif(5))
> b
c d
1 0.7604137 0.9753853
2 0.7553924 0.1210260
3 0.7315970 0.6196829
4 0.5619395 0.1120331
5 0.5711995 0.7252631
解决方案
使用mapply
:
> mapply(c, a,b) #or as.data.frame(mapply(c, a,b)) for a data.frame
a b
[1,] 0.8403348 0.1579255
[2,] 0.4759767 0.8182902
[3,] 0.8091875 0.1080651
[4,] 0.9846333 0.7035959
[5,] 0.2153991 0.8744136
[6,] 0.7604137 0.9753853
[7,] 0.7553924 0.1210260
[8,] 0.7315970 0.6196829
[9,] 0.5619395 0.1120331
[10,] 0.5711995 0.7252631
并且基于@Marat 的以下评论:
您也可以 data.frame(mapply(c, a, b, SIMPLIFY=FALSE))
或者 data.frame(Map(c,a,b))
来避免双重 data.frame-矩阵转换
您可以使用 rbindlist
,它采用不同的列名。使用@LyzandeR 的数据
library(data.table) #data.table_1.9.5
rbindlist(list(a,b))
# a b
# 1: 0.8403348 0.1579255
# 2: 0.4759767 0.8182902
# 3: 0.8091875 0.1080651
# 4: 0.9846333 0.7035959
# 5: 0.2153991 0.8744136
# 6: 0.7604137 0.9753853
# 7: 0.7553924 0.1210260
# 8: 0.7315970 0.6196829
# 9: 0.5619395 0.1120331
#10: 0.5711995 0.7252631
更新
基于12个数据集的对象名称(即'Goal1_Costo'、'Goal2_Costo'、...、'Goal12_Costo'),
nm1 <- paste(paste0('Goal', 1:12), 'Costo', sep="_")
#or using `sprintf`
#nm1 <- sprintf('%s%d_%s', 'Goal', 1:12, 'Costo')
rbindlist(mget(nm1))
这是一个可能的 tidyverse 解决方案。我根据您对数据框的描述创建了 3 个示例数据框。
df1 <- read.table(text ="date,source,medium,campaign,goal1Completions,ad.cost,Goal
2014-10-01,(direct),(none),(notset),1,0.0000,Vida
2014-10-01,Master,email,CAFRE,2,0.0000,Vida
2014-10-01,apeseg,referral,(not set),3,0.0000,vida",sep = ",",header=TRUE)
df2 <- read.table(text ="date,source,medium,campaign,goal2Completions,ad.cost,Goal
2014-10-01,(direct),(none),(notset),4,0.0000,Vida
2014-10-01,Master,email,CAFRE,5,0.0000,Vida
2014-10-01,apeseg,referral,(not set),6,0.0000,vida",sep = ",",header=TRUE)
df3 <- read.table(text ="date,source,medium,campaign,goal3Completions,ad.cost,Goal
2014-10-01,(direct),(none),(notset),7,0.0000,Vida
2014-10-01,Master,email,CAFRE,8,0.0000,Vida
2014-10-01,apeseg,referral,(not set),9,0.0000,vida",sep = ",",header=TRUE)
> df1
date source medium campaign goal1Completions ad.cost Goal
1 2014-10-01 (direct) (none) (notset) 1 0 Vida
2 2014-10-01 Master email CAFRE 2 0 Vida
3 2014-10-01 apeseg referral (not set) 3 0 vida
> df2
date source medium campaign goal2Completions ad.cost Goal
1 2014-10-01 (direct) (none) (notset) 4 0 Vida
2 2014-10-01 Master email CAFRE 5 0 Vida
3 2014-10-01 apeseg referral (not set) 6 0 vida
> df3
date source medium campaign goal3Completions ad.cost Goal
1 2014-10-01 (direct) (none) (notset) 7 0 Vida
2 2014-10-01 Master email CAFRE 8 0 Vida
3 2014-10-01 apeseg referral (not set) 9 0 vida
library(dplyr)
library(tidyselect)
library(purrr)
bind_rows(df1,df2,df3) %>%
mutate(goalCompletions = reduce(select_at(.,vars(matches("goal[[:digit:]]+Completions"))),coalesce)) %>%
select_at(vars(-matches("goal[[:digit:]]+Completions")))
date source medium campaign ad.cost Goal goalCompletions
1 2014-10-01 (direct) (none) (notset) 0 Vida 1
2 2014-10-01 Master email CAFRE 0 Vida 2
3 2014-10-01 apeseg referral (not set) 0 vida 3
4 2014-10-01 (direct) (none) (notset) 0 Vida 4
5 2014-10-01 Master email CAFRE 0 Vida 5
6 2014-10-01 apeseg referral (not set) 0 vida 6
7 2014-10-01 (direct) (none) (notset) 0 Vida 7
8 2014-10-01 Master email CAFRE 0 Vida 8
9 2014-10-01 apeseg referral (not set) 0 vida 9
另一种基础 R 方法,如果你有 data.frame
个不同的列名:
# Create a list of data frames
df_list <- list()
df_list[[1]] <- data.frame(x = 1, y = paste0("y1", 1:3))
df_list[[2]] <- data.frame(x = 2, y = paste0("y2", 1:4))
df_list[[3]] <- data.frame(x = 3, y = paste0("y3", 1:5), z = "z3")
df_list
#> [[1]]
#> x y
#> 1 1 y11
#> 2 1 y12
#> 3 1 y13
#>
#> [[2]]
#> x y
#> 1 2 y21
#> 2 2 y22
#> 3 2 y23
#> 4 2 y24
#>
#> [[3]]
#> x y z
#> 1 3 y31 z3
#> 2 3 y32 z3
#> 3 3 y33 z3
#> 4 3 y34 z3
#> 5 3 y35 z3
# This works when the column names are the same
do.call(rbind, df_list[1:2])
#> x y
#> 1 1 y11
#> 2 1 y12
#> 3 1 y13
#> 4 2 y21
#> 5 2 y22
#> 6 2 y23
#> 7 2 y24
# but fails when the column names differ
do.call(rbind, df_list)
#> Error in rbind(deparse.level, ...): numbers of columns of arguments do not match
# This can fill the unmatched columns with NA's without
# depending on other packages:
Reduce(rbind, Map(function(x) {
x[, setdiff(unique(unlist(lapply(df_list, colnames))), names(x))] <- NA;
return(x)
},
df_list))
#> x y z
#> 1 1 y11 <NA>
#> 2 1 y12 <NA>
#> 3 1 y13 <NA>
#> 4 2 y21 <NA>
#> 5 2 y22 <NA>
#> 6 2 y23 <NA>
#> 7 2 y24 <NA>
#> 8 3 y31 z3
#> 9 3 y32 z3
#> 10 3 y33 z3
#> 11 3 y34 z3
#> 12 3 y35 z3
我有 12 个数据框,每个包含 6 列:5 个名称相同,1 个不同。然后当我打电话给 rbind()
我得到:
Error in match.names(clabs, names(xi)) :
names do not match previous names
不同的列是:“goal1Completions”。有 12 个 goalsCompletions...它们是:“goal1Completions”、“goal2Completions”、“goal3Completions”...等等。
我能想到的最好方法是:将每个数据框中的每一列重命名为“GoalsCompletions”,然后使用“rbind()”。
有没有更简单的方法?
在 Google 上查找 O 找到了这个包:“gtools”。它有一个功能叫做:“smartbind”。但是,在使用 smartbind() 之后,我想查看带有“View()”的数据框,我的 R 会话崩溃了...
我的数据(第一个数据框的例子):
date source medium campaign goal1Completions ad.cost Goal
1 2014-10-01 (direct) (none) (not set) 0 0.0000 Vida
2 2014-10-01 Master email CAFRE 0 0.0000 Vida
3 2014-10-01 apeseg referral (not set) 0 0.0000 Vida
我会重命名列。如果列的顺序相同,这对于 names()
来说非常容易。
df1 <- data.frame(one=1:10,two=11:20,three=21:30)
df2 <- data.frame(four=31:40,five=41:50,six=51:60)
names(df2)<-names(df1)
rbind(df1,df2)
或
df1 <- data.frame(one=1:10,two=11:20,three=21:30)
df2 <- data.frame(four=31:40,five=41:50,six=51:60)
rbind(df1,setnames(df2,names(df1)))
结果:
one two three
1 1 11 21
2 2 12 22
3 3 13 23
4 4 14 24
5 5 15 25
6 6 16 26
7 7 17 27
8 8 18 28
9 9 19 29
10 10 20 30
11 31 41 51
12 32 42 52
13 33 43 53
14 34 44 54
15 35 45 55
16 36 46 56
17 37 47 57
18 38 48 58
19 39 49 59
20 40 50 60
我最喜欢的mapply
用法:
示例数据
a <- data.frame(a=runif(5), b=runif(5))
> a
a b
1 0.8403348 0.1579255
2 0.4759767 0.8182902
3 0.8091875 0.1080651
4 0.9846333 0.7035959
5 0.2153991 0.8744136
和b
b <- data.frame(c=runif(5), d=runif(5))
> b
c d
1 0.7604137 0.9753853
2 0.7553924 0.1210260
3 0.7315970 0.6196829
4 0.5619395 0.1120331
5 0.5711995 0.7252631
解决方案
使用mapply
:
> mapply(c, a,b) #or as.data.frame(mapply(c, a,b)) for a data.frame
a b
[1,] 0.8403348 0.1579255
[2,] 0.4759767 0.8182902
[3,] 0.8091875 0.1080651
[4,] 0.9846333 0.7035959
[5,] 0.2153991 0.8744136
[6,] 0.7604137 0.9753853
[7,] 0.7553924 0.1210260
[8,] 0.7315970 0.6196829
[9,] 0.5619395 0.1120331
[10,] 0.5711995 0.7252631
并且基于@Marat 的以下评论:
您也可以 data.frame(mapply(c, a, b, SIMPLIFY=FALSE))
或者 data.frame(Map(c,a,b))
来避免双重 data.frame-矩阵转换
您可以使用 rbindlist
,它采用不同的列名。使用@LyzandeR 的数据
library(data.table) #data.table_1.9.5
rbindlist(list(a,b))
# a b
# 1: 0.8403348 0.1579255
# 2: 0.4759767 0.8182902
# 3: 0.8091875 0.1080651
# 4: 0.9846333 0.7035959
# 5: 0.2153991 0.8744136
# 6: 0.7604137 0.9753853
# 7: 0.7553924 0.1210260
# 8: 0.7315970 0.6196829
# 9: 0.5619395 0.1120331
#10: 0.5711995 0.7252631
更新
基于12个数据集的对象名称(即'Goal1_Costo'、'Goal2_Costo'、...、'Goal12_Costo'),
nm1 <- paste(paste0('Goal', 1:12), 'Costo', sep="_")
#or using `sprintf`
#nm1 <- sprintf('%s%d_%s', 'Goal', 1:12, 'Costo')
rbindlist(mget(nm1))
这是一个可能的 tidyverse 解决方案。我根据您对数据框的描述创建了 3 个示例数据框。
df1 <- read.table(text ="date,source,medium,campaign,goal1Completions,ad.cost,Goal
2014-10-01,(direct),(none),(notset),1,0.0000,Vida
2014-10-01,Master,email,CAFRE,2,0.0000,Vida
2014-10-01,apeseg,referral,(not set),3,0.0000,vida",sep = ",",header=TRUE)
df2 <- read.table(text ="date,source,medium,campaign,goal2Completions,ad.cost,Goal
2014-10-01,(direct),(none),(notset),4,0.0000,Vida
2014-10-01,Master,email,CAFRE,5,0.0000,Vida
2014-10-01,apeseg,referral,(not set),6,0.0000,vida",sep = ",",header=TRUE)
df3 <- read.table(text ="date,source,medium,campaign,goal3Completions,ad.cost,Goal
2014-10-01,(direct),(none),(notset),7,0.0000,Vida
2014-10-01,Master,email,CAFRE,8,0.0000,Vida
2014-10-01,apeseg,referral,(not set),9,0.0000,vida",sep = ",",header=TRUE)
> df1
date source medium campaign goal1Completions ad.cost Goal
1 2014-10-01 (direct) (none) (notset) 1 0 Vida
2 2014-10-01 Master email CAFRE 2 0 Vida
3 2014-10-01 apeseg referral (not set) 3 0 vida
> df2
date source medium campaign goal2Completions ad.cost Goal
1 2014-10-01 (direct) (none) (notset) 4 0 Vida
2 2014-10-01 Master email CAFRE 5 0 Vida
3 2014-10-01 apeseg referral (not set) 6 0 vida
> df3
date source medium campaign goal3Completions ad.cost Goal
1 2014-10-01 (direct) (none) (notset) 7 0 Vida
2 2014-10-01 Master email CAFRE 8 0 Vida
3 2014-10-01 apeseg referral (not set) 9 0 vida
library(dplyr)
library(tidyselect)
library(purrr)
bind_rows(df1,df2,df3) %>%
mutate(goalCompletions = reduce(select_at(.,vars(matches("goal[[:digit:]]+Completions"))),coalesce)) %>%
select_at(vars(-matches("goal[[:digit:]]+Completions")))
date source medium campaign ad.cost Goal goalCompletions
1 2014-10-01 (direct) (none) (notset) 0 Vida 1
2 2014-10-01 Master email CAFRE 0 Vida 2
3 2014-10-01 apeseg referral (not set) 0 vida 3
4 2014-10-01 (direct) (none) (notset) 0 Vida 4
5 2014-10-01 Master email CAFRE 0 Vida 5
6 2014-10-01 apeseg referral (not set) 0 vida 6
7 2014-10-01 (direct) (none) (notset) 0 Vida 7
8 2014-10-01 Master email CAFRE 0 Vida 8
9 2014-10-01 apeseg referral (not set) 0 vida 9
另一种基础 R 方法,如果你有 data.frame
个不同的列名:
# Create a list of data frames
df_list <- list()
df_list[[1]] <- data.frame(x = 1, y = paste0("y1", 1:3))
df_list[[2]] <- data.frame(x = 2, y = paste0("y2", 1:4))
df_list[[3]] <- data.frame(x = 3, y = paste0("y3", 1:5), z = "z3")
df_list
#> [[1]]
#> x y
#> 1 1 y11
#> 2 1 y12
#> 3 1 y13
#>
#> [[2]]
#> x y
#> 1 2 y21
#> 2 2 y22
#> 3 2 y23
#> 4 2 y24
#>
#> [[3]]
#> x y z
#> 1 3 y31 z3
#> 2 3 y32 z3
#> 3 3 y33 z3
#> 4 3 y34 z3
#> 5 3 y35 z3
# This works when the column names are the same
do.call(rbind, df_list[1:2])
#> x y
#> 1 1 y11
#> 2 1 y12
#> 3 1 y13
#> 4 2 y21
#> 5 2 y22
#> 6 2 y23
#> 7 2 y24
# but fails when the column names differ
do.call(rbind, df_list)
#> Error in rbind(deparse.level, ...): numbers of columns of arguments do not match
# This can fill the unmatched columns with NA's without
# depending on other packages:
Reduce(rbind, Map(function(x) {
x[, setdiff(unique(unlist(lapply(df_list, colnames))), names(x))] <- NA;
return(x)
},
df_list))
#> x y z
#> 1 1 y11 <NA>
#> 2 1 y12 <NA>
#> 3 1 y13 <NA>
#> 4 2 y21 <NA>
#> 5 2 y22 <NA>
#> 6 2 y23 <NA>
#> 7 2 y24 <NA>
#> 8 3 y31 z3
#> 9 3 y32 z3
#> 10 3 y33 z3
#> 11 3 y34 z3
#> 12 3 y35 z3