如何使用 R 行绑定具有不同列数的两个数据集
How to rowbind two datasets with different number of columns using R
我正在尝试对具有不同列数的两个 xts
数据集进行行绑定:
数据集 1:
one <- structure(c(0, 0.009009, 0.008929, -0.00885, 0, -0.017857, -0.027957,
-0.00885, -0.013393, -0.024887, 0.00232, -0.009259, 0, 0, 0,
0, 0, 0, -0.017794, 0.028986, -0.007143, 0.007194, 0.021429,
0.017483, 0, 0, 0, 0, 0, 0, 0.007968, -0.011858, 0, -0.032, -0.008264,
0.045833, 0.015924, 0.00627, -0.003115, 0, 0.00625, 0.024845), class = c("xts",
"zoo"), .indexCLASS = c("POSIXt", "POSIXct"), tclass = c("POSIXt",
"POSIXct"), tzone = "", index = c(346406400, 346492800, 346665600,
346924800, 347011200, 347097600), .Dim = 6:7, .Dimnames = list(
NULL, c("ALLEGHENY.POWER.SYSTEMS.INC", "ALLIED.CHEMICAL.CORP",
"APPLICATION.ENGR.CORP", "ALLIS.CHALMERS.CORP", "AMERICAN.ELECTR.LABS.INC",
"A.E.L.INDUSTRIES.INC", "AMAX.INC")))
数据集 2
two <- structure(c(0, 0, 0, 0, 0, 0, 0.071429, 0.066667, 0, -0.125,
0, 0, 0.018182, 0.026786, 0, 0.008696, -0.025862, -0.017699,
0.009346, 0.006944, 0.011494, -0.045455, -0.028571, 0.014706,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), class = c("xts", "zoo"), .indexCLASS = c("POSIXt",
"POSIXct"), tclass = c("POSIXt", "POSIXct"), tzone = "", index = c(347270400,
347529600, 347616000, 347702400, 347788800, 347875200), .Dim = c(6L,
6L), .Dimnames = list(NULL, c("A.C.S.ENTERPRISE.INC", "A.C.S.INDUSTRIES.INC",
"ALLEGHENY.POWER.SYSTEMS.INC", "ALLIED.CHEMICAL.CORP", "ALLIED.CORP",
"ALLIED.SIGNAL.INC")))
我已尝试 rbind(one,two, by=colnames(one))
但出现以下错误:
Error in rbind(deparse.level, ...) :
data must have same number of columns to bind by row
基本上我想行绑定 xts
并使用 0
添加额外的列
处理缺失的列。
期望的输出:
DES <- structure(c(0, 0.009009, 0.008929, -0.00885, 0, -0.017857, 0.018182,
0.026786, 0, 0.008696, -0.025862, -0.017699, -0.027957, -0.00885,
-0.013393, -0.024887, 0.00232, -0.009259, 0.009346, 0.006944,
0.011494, -0.045455, -0.028571, 0.014706, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, -0.017794, 0.028986, -0.007143, 0.007194, 0.021429,
0.017483, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0.007968, -0.011858, 0, -0.032, -0.008264, 0.045833, 0, 0,
0, 0, 0, 0, 0.015924, 0.00627, -0.003115, 0, 0.00625, 0.024845,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0.071429, 0.066667, 0, -0.125, 0, 0, 0, 0, 0, 0, 0,
0, 0.009346, 0.006944, 0.011494, -0.045455, -0.028571, 0.014706,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), .Dim = c(12L, 11L), .Dimnames = list(
NULL, c("ALLEGHENY.POWER.SYSTEMS.INC", "ALLIED.CHEMICAL.CORP",
"APPLICATION.ENGR.CORP", "ALLIS.CHALMERS.CORP", "AMERICAN.ELECTR.LABS.INC",
"A.E.L.INDUSTRIES.INC", "AMAX.INC", "A.C.S.ENTERPRISE.INC",
"A.C.S.INDUSTRIES.INC", "ALLIED.CHEMICAL.CORP.1", "ALLIED.SIGNAL.INC"
)), index = c(346406400, 346492800, 346665600, 346924800,
347011200, 347097600, 347270400, 347529600, 347616000, 347702400,
347788800, 347875200), class = c("xts", "zoo"), .indexCLASS = c("POSIXt",
"POSIXct"), tclass = c("POSIXt", "POSIXct"), tzone = "")
一种方法是创建一个具有适当尺寸的新矩阵(“m1”),即。 “m1”的 nrow
将是“一”和“二”行的总和,同样,ncol
是两个数据集中所有唯一列的长度。创建 'name' 个索引('onenm'、'twonm'),这些索引仅存在于一个数据集中,或在两个数据集中创建唯一的列名称('nm2'),或在两个数据集中通用的名称( 'nm1')。通过使用适当的 'row/column' 索引,我们可以将来自 'one'、'two' 数据集的元素分配给新创建的 xts
数据集(从“m1”创建的“xt1”)。
nm1 <- intersect(colnames(one), colnames(two))
onenm <- setdiff(colnames(one), colnames(two))
twonm <- setdiff(colnames(two), colnames(one))
nm2 <- union(colnames(one), colnames(two))
m1 <- matrix(0, nrow=nrow(one)+nrow(two), ncol=length(nm2),
dimnames=list(NULL, nm2))
xt1 <- xts(m1, order.by=c(index(one), index(two)))
xt1[index(one), onenm] <- one[,onenm]
xt1[index(two), twonm] <- two[,twonm]
xt1[,nm1] <- rbind(one[,nm1], two[,nm1])
dim(xt1)
#[1] 12 11
更新
您也可以使用 data.table
中的 rbindlist
(或 dplyr
中的 bind_rows
)。将 xts
对象转换为“data.frame”,将其放入列表中并使用 rbindlist
和 fill=TRUE
选项。将输出 ('dt1') 转换为 xts
('xt1'),将“NA”值更改为“0”。
library(data.table)
dt1 <- rbindlist(list(as.data.frame(one),
as.data.frame(two)), fill=TRUE)
#or
#library(dplyr)
#dt1 <- bind_rows(list(as.data.frame(one), as.data.frame(two)))
xt2 <- xts(dt1, order.by=c(index(one), index(two)))
xt2[is.na(xt2)] <- 0
identical(xt1, xt2)
#[1] TRUE
除了 akrun 的出色回答,我在这里分享一个我一直用来执行两个 xts 的稳健 rbind 的函数:
rbind.ordered=function(x,y){
if (is.null(x)) return(y)
if (is.null(y)) return(x)
diffCol = setdiff(colnames(x),colnames(y))
if (length(diffCol)>0){
cols=colnames(y)
for (i in 1:length(diffCol)) y=cbind(y,NA)
colnames(y)=c(cols,diffCol)
}
diffCol = setdiff(colnames(y),colnames(x))
if (length(diffCol)>0){
cols=colnames(x)
for (i in 1:length(diffCol)) x=cbind(x,NA)
colnames(x)=c(cols,diffCol)
}
return(rbind(x, y[, colnames(x)]))
}
rbind.ordered(one, two)
那么你只需要将 NA 替换为 0 就可以得到你想要的东西
我正在尝试对具有不同列数的两个 xts
数据集进行行绑定:
数据集 1:
one <- structure(c(0, 0.009009, 0.008929, -0.00885, 0, -0.017857, -0.027957,
-0.00885, -0.013393, -0.024887, 0.00232, -0.009259, 0, 0, 0,
0, 0, 0, -0.017794, 0.028986, -0.007143, 0.007194, 0.021429,
0.017483, 0, 0, 0, 0, 0, 0, 0.007968, -0.011858, 0, -0.032, -0.008264,
0.045833, 0.015924, 0.00627, -0.003115, 0, 0.00625, 0.024845), class = c("xts",
"zoo"), .indexCLASS = c("POSIXt", "POSIXct"), tclass = c("POSIXt",
"POSIXct"), tzone = "", index = c(346406400, 346492800, 346665600,
346924800, 347011200, 347097600), .Dim = 6:7, .Dimnames = list(
NULL, c("ALLEGHENY.POWER.SYSTEMS.INC", "ALLIED.CHEMICAL.CORP",
"APPLICATION.ENGR.CORP", "ALLIS.CHALMERS.CORP", "AMERICAN.ELECTR.LABS.INC",
"A.E.L.INDUSTRIES.INC", "AMAX.INC")))
数据集 2
two <- structure(c(0, 0, 0, 0, 0, 0, 0.071429, 0.066667, 0, -0.125,
0, 0, 0.018182, 0.026786, 0, 0.008696, -0.025862, -0.017699,
0.009346, 0.006944, 0.011494, -0.045455, -0.028571, 0.014706,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), class = c("xts", "zoo"), .indexCLASS = c("POSIXt",
"POSIXct"), tclass = c("POSIXt", "POSIXct"), tzone = "", index = c(347270400,
347529600, 347616000, 347702400, 347788800, 347875200), .Dim = c(6L,
6L), .Dimnames = list(NULL, c("A.C.S.ENTERPRISE.INC", "A.C.S.INDUSTRIES.INC",
"ALLEGHENY.POWER.SYSTEMS.INC", "ALLIED.CHEMICAL.CORP", "ALLIED.CORP",
"ALLIED.SIGNAL.INC")))
我已尝试 rbind(one,two, by=colnames(one))
但出现以下错误:
Error in rbind(deparse.level, ...) :
data must have same number of columns to bind by row
基本上我想行绑定 xts
并使用 0
添加额外的列
处理缺失的列。
期望的输出:
DES <- structure(c(0, 0.009009, 0.008929, -0.00885, 0, -0.017857, 0.018182,
0.026786, 0, 0.008696, -0.025862, -0.017699, -0.027957, -0.00885,
-0.013393, -0.024887, 0.00232, -0.009259, 0.009346, 0.006944,
0.011494, -0.045455, -0.028571, 0.014706, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, -0.017794, 0.028986, -0.007143, 0.007194, 0.021429,
0.017483, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0.007968, -0.011858, 0, -0.032, -0.008264, 0.045833, 0, 0,
0, 0, 0, 0, 0.015924, 0.00627, -0.003115, 0, 0.00625, 0.024845,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0.071429, 0.066667, 0, -0.125, 0, 0, 0, 0, 0, 0, 0,
0, 0.009346, 0.006944, 0.011494, -0.045455, -0.028571, 0.014706,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), .Dim = c(12L, 11L), .Dimnames = list(
NULL, c("ALLEGHENY.POWER.SYSTEMS.INC", "ALLIED.CHEMICAL.CORP",
"APPLICATION.ENGR.CORP", "ALLIS.CHALMERS.CORP", "AMERICAN.ELECTR.LABS.INC",
"A.E.L.INDUSTRIES.INC", "AMAX.INC", "A.C.S.ENTERPRISE.INC",
"A.C.S.INDUSTRIES.INC", "ALLIED.CHEMICAL.CORP.1", "ALLIED.SIGNAL.INC"
)), index = c(346406400, 346492800, 346665600, 346924800,
347011200, 347097600, 347270400, 347529600, 347616000, 347702400,
347788800, 347875200), class = c("xts", "zoo"), .indexCLASS = c("POSIXt",
"POSIXct"), tclass = c("POSIXt", "POSIXct"), tzone = "")
一种方法是创建一个具有适当尺寸的新矩阵(“m1”),即。 “m1”的 nrow
将是“一”和“二”行的总和,同样,ncol
是两个数据集中所有唯一列的长度。创建 'name' 个索引('onenm'、'twonm'),这些索引仅存在于一个数据集中,或在两个数据集中创建唯一的列名称('nm2'),或在两个数据集中通用的名称( 'nm1')。通过使用适当的 'row/column' 索引,我们可以将来自 'one'、'two' 数据集的元素分配给新创建的 xts
数据集(从“m1”创建的“xt1”)。
nm1 <- intersect(colnames(one), colnames(two))
onenm <- setdiff(colnames(one), colnames(two))
twonm <- setdiff(colnames(two), colnames(one))
nm2 <- union(colnames(one), colnames(two))
m1 <- matrix(0, nrow=nrow(one)+nrow(two), ncol=length(nm2),
dimnames=list(NULL, nm2))
xt1 <- xts(m1, order.by=c(index(one), index(two)))
xt1[index(one), onenm] <- one[,onenm]
xt1[index(two), twonm] <- two[,twonm]
xt1[,nm1] <- rbind(one[,nm1], two[,nm1])
dim(xt1)
#[1] 12 11
更新
您也可以使用 data.table
中的 rbindlist
(或 dplyr
中的 bind_rows
)。将 xts
对象转换为“data.frame”,将其放入列表中并使用 rbindlist
和 fill=TRUE
选项。将输出 ('dt1') 转换为 xts
('xt1'),将“NA”值更改为“0”。
library(data.table)
dt1 <- rbindlist(list(as.data.frame(one),
as.data.frame(two)), fill=TRUE)
#or
#library(dplyr)
#dt1 <- bind_rows(list(as.data.frame(one), as.data.frame(two)))
xt2 <- xts(dt1, order.by=c(index(one), index(two)))
xt2[is.na(xt2)] <- 0
identical(xt1, xt2)
#[1] TRUE
除了 akrun 的出色回答,我在这里分享一个我一直用来执行两个 xts 的稳健 rbind 的函数:
rbind.ordered=function(x,y){
if (is.null(x)) return(y)
if (is.null(y)) return(x)
diffCol = setdiff(colnames(x),colnames(y))
if (length(diffCol)>0){
cols=colnames(y)
for (i in 1:length(diffCol)) y=cbind(y,NA)
colnames(y)=c(cols,diffCol)
}
diffCol = setdiff(colnames(y),colnames(x))
if (length(diffCol)>0){
cols=colnames(x)
for (i in 1:length(diffCol)) x=cbind(x,NA)
colnames(x)=c(cols,diffCol)
}
return(rbind(x, y[, colnames(x)]))
}
rbind.ordered(one, two)
那么你只需要将 NA 替换为 0 就可以得到你想要的东西