为什么在具有 0 列的 data.frame 上调用 rbind 会删除所有行?
Why calling rbind on data.frame with 0 columns drops all the rows?
我注意到 matrix
和 data.frame
对象之间的 rbind
行为存在差异。
有了 matrix
个对象,一切都按预期工作:
mat1 <- matrix(nrow=2, ncol=0)
mat2 <- matrix(nrow=2, ncol=0)
dim(rbind(mat1, mat2))
[1] 4 0
但是如果我们突然将它们变成 data.frame
它会丢失行数:
> dim(rbind(as.data.frame(mat1), as.data.frame(mat2)))
[1] 0 0
我想了解的是 - 这种行为是故意的吗?如果是这样,在这种情况下减少行数的原因是什么?
编辑:如@PoGibas 所述 - 此行为记录在 ?rbind
中。没有给出原因,可能很难推断出原因。所以问题变成:
如何 rbind
任意数量的 data.frames 同时始终保留它们的行数?
解决方法是使用 cbind
和换位:
m <- matrix(nrow = 2, ncol = 0)
as.data.frame(t(cbind(as.data.frame(t(m)), as.data.frame(t(m)))))
# Returns: data frame with 0 columns and 4 rows
此处cbind
创建一个0行4列的data.frame,我们将其转置为4行0列的矩阵。
另一种解决方案是对原始 base::rbind.data.frame
(source on github) 函数进行粗暴修改。
你必须 remove/comment 出两部分:
如果长度不是正整数(length(data.frame())
returns 0
),则删除参数。注释掉this part:
allargs <- allargs[lengths(allargs) > 0L]
Return of empty data.frame 如果属性名称为空(不能将属性设置为空 data.frame - names(as.data.frame(mat1)) <- ""
returns 一个错误)。注释掉this part:
if(nvar == 0L) return(structure(list(), class = "data.frame", row.names = integer()))
结果:
m <- matrix(nrow = 2, ncol = 0)
dim(rbind.data.frame2(as.data.frame(m), as.data.frame(m)))
# Returns: [1] 4 0
代码:
rbind.data.frame2 <- function(..., deparse.level = 1, make.row.names = TRUE,
stringsAsFactors = default.stringsAsFactors())
{
match.names <- function(clabs, nmi)
{
if(identical(clabs, nmi)) NULL
else if(length(nmi) == length(clabs) && all(nmi %in% clabs)) {
## we need 1-1 matches here
m <- pmatch(nmi, clabs, 0L)
if(any(m == 0L))
stop("names do not match previous names")
m
} else stop("names do not match previous names")
}
if(make.row.names)
Make.row.names <- function(nmi, ri, ni, nrow)
{
if(nzchar(nmi)) {
if(ni == 0L) character() # PR8506
else if(ni > 1L) paste(nmi, ri, sep = ".")
else nmi
}
else if(nrow > 0L && identical(ri, seq_len(ni)) &&
identical(unlist(rlabs, FALSE, FALSE), seq_len(nrow)))
as.integer(seq.int(from = nrow + 1L, length.out = ni))
else ri
}
allargs <- list(...)
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# allargs <- allargs[lengths(allargs) > 0L]
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
if(length(allargs)) {
## drop any zero-row data frames, as they may not have proper column
## types (e.g. NULL).
nr <- vapply(allargs, function(x)
if(is.data.frame(x)) .row_names_info(x, 2L)
else if(is.list(x)) length(x[[1L]])
# mismatched lists are checked later
else length(x), 1L)
if(any(nr > 0L)) allargs <- allargs[nr > 0L]
else return(allargs[[1L]]) # pretty arbitrary
}
n <- length(allargs)
if(n == 0L)
return(structure(list(),
class = "data.frame",
row.names = integer()))
nms <- names(allargs)
if(is.null(nms))
nms <- character(n)
cl <- NULL
perm <- rows <- vector("list", n)
rlabs <- if(make.row.names) rows # else NULL
nrow <- 0L
value <- clabs <- NULL
all.levs <- list()
for(i in seq_len(n)) {
## check the arguments, develop row and column labels
xi <- allargs[[i]]
nmi <- nms[i]
## coerce matrix to data frame
if(is.matrix(xi)) allargs[[i]] <- xi <-
as.data.frame(xi, stringsAsFactors = stringsAsFactors)
if(inherits(xi, "data.frame")) {
if(is.null(cl))
cl <- oldClass(xi)
ri <- attr(xi, "row.names")
ni <- length(ri)
if(is.null(clabs)) ## first time
clabs <- names(xi)
else {
if(length(xi) != length(clabs))
stop("numbers of columns of arguments do not match")
pi <- match.names(clabs, names(xi))
if( !is.null(pi) ) perm[[i]] <- pi
}
rows[[i]] <- seq.int(from = nrow + 1L, length.out = ni)
if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow)
nrow <- nrow + ni
if(is.null(value)) { ## first time ==> setup once:
value <- unclass(xi)
nvar <- length(value)
all.levs <- vector("list", nvar)
has.dim <- facCol <- ordCol <- logical(nvar)
for(j in seq_len(nvar)) {
xj <- value[[j]]
facCol[j] <-
if(!is.null(levels(xj))) {
all.levs[[j]] <- levels(xj)
TRUE # turn categories into factors
} else
is.factor(xj)
ordCol[j] <- is.ordered(xj)
has.dim[j] <- length(dim(xj)) == 2L
}
}
else for(j in seq_len(nvar)) {
xij <- xi[[j]]
if(is.null(pi) || is.na(jj <- pi[[j]])) jj <- j
if(facCol[jj]) {
if(length(lij <- levels(xij))) {
all.levs[[jj]] <- unique(c(all.levs[[jj]], lij))
ordCol[jj] <- ordCol[jj] & is.ordered(xij)
} else if(is.character(xij))
all.levs[[jj]] <- unique(c(all.levs[[jj]], xij))
}
}
}
else if(is.list(xi)) {
ni <- range(lengths(xi))
if(ni[1L] == ni[2L])
ni <- ni[1L]
else stop("invalid list argument: all variables should have the same length")
rows[[i]] <- ri <-
as.integer(seq.int(from = nrow + 1L, length.out = ni))
nrow <- nrow + ni
if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow)
if(length(nmi <- names(xi)) > 0L) {
if(is.null(clabs))
clabs <- nmi
else {
if(length(xi) != length(clabs))
stop("numbers of columns of arguments do not match")
pi <- match.names(clabs, nmi)
if( !is.null(pi) ) perm[[i]] <- pi
}
}
}
else if(length(xi)) { # 1 new row
rows[[i]] <- nrow <- nrow + 1L
if(make.row.names)
rlabs[[i]] <- if(nzchar(nmi)) nmi else as.integer(nrow)
}
}
nvar <- length(clabs)
if(nvar == 0L)
nvar <- max(lengths(allargs)) # only vector args
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# if(nvar == 0L)
# return(structure(list(), class = "data.frame",
# row.names = integer()))
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
pseq <- seq_len(nvar)
if(is.null(value)) { # this happens if there has been no data frame
value <- list()
value[pseq] <- list(logical(nrow)) # OK for coercion except to raw.
all.levs <- vector("list", nvar)
has.dim <- facCol <- ordCol <- logical(nvar)
}
names(value) <- clabs
for(j in pseq)
if(length(lij <- all.levs[[j]]))
value[[j]] <-
factor(as.vector(value[[j]]), lij, ordered = ordCol[j])
if(any(has.dim)) {
rmax <- max(unlist(rows))
for(i in pseq[has.dim])
if(!inherits(xi <- value[[i]], "data.frame")) {
dn <- dimnames(xi)
rn <- dn[[1L]]
if(length(rn) > 0L) length(rn) <- rmax
pi <- dim(xi)[2L]
length(xi) <- rmax * pi
value[[i]] <- array(xi, c(rmax, pi), list(rn, dn[[2L]]))
}
}
for(i in seq_len(n)) {
xi <- unclass(allargs[[i]])
if(!is.list(xi))
if(length(xi) != nvar)
xi <- rep(xi, length.out = nvar)
ri <- rows[[i]]
pi <- perm[[i]]
if(is.null(pi)) pi <- pseq
for(j in pseq) {
jj <- pi[j]
xij <- xi[[j]]
if(has.dim[jj]) {
value[[jj]][ri, ] <- xij
## copy rownames
rownames(value[[jj]])[ri] <- rownames(xij)
} else {
## coerce factors to vectors, in case lhs is character or
## level set has changed
value[[jj]][ri] <- if(is.factor(xij)) as.vector(xij) else xij
## copy names if any
if(!is.null(nm <- names(xij))) names(value[[jj]])[ri] <- nm
}
}
}
if(make.row.names) {
rlabs <- unlist(rlabs)
if(anyDuplicated(rlabs))
rlabs <- make.unique(as.character(rlabs), sep = "")
}
if(is.null(cl)) {
as.data.frame(value, row.names = rlabs, fix.empty.names = TRUE,
stringsAsFactors = stringsAsFactors)
} else {
structure(value, class = cl,
row.names = if(is.null(rlabs)) .set_row_names(nrow) else rlabs)
}
}
我注意到 matrix
和 data.frame
对象之间的 rbind
行为存在差异。
有了 matrix
个对象,一切都按预期工作:
mat1 <- matrix(nrow=2, ncol=0)
mat2 <- matrix(nrow=2, ncol=0)
dim(rbind(mat1, mat2))
[1] 4 0
但是如果我们突然将它们变成 data.frame
它会丢失行数:
> dim(rbind(as.data.frame(mat1), as.data.frame(mat2)))
[1] 0 0
我想了解的是 - 这种行为是故意的吗?如果是这样,在这种情况下减少行数的原因是什么?
编辑:如@PoGibas 所述 - 此行为记录在 ?rbind
中。没有给出原因,可能很难推断出原因。所以问题变成:
如何 rbind
任意数量的 data.frames 同时始终保留它们的行数?
解决方法是使用 cbind
和换位:
m <- matrix(nrow = 2, ncol = 0)
as.data.frame(t(cbind(as.data.frame(t(m)), as.data.frame(t(m)))))
# Returns: data frame with 0 columns and 4 rows
此处cbind
创建一个0行4列的data.frame,我们将其转置为4行0列的矩阵。
另一种解决方案是对原始 base::rbind.data.frame
(source on github) 函数进行粗暴修改。
你必须 remove/comment 出两部分:
如果长度不是正整数(
length(data.frame())
returns0
),则删除参数。注释掉this part:allargs <- allargs[lengths(allargs) > 0L]
Return of empty data.frame 如果属性名称为空(不能将属性设置为空 data.frame -
names(as.data.frame(mat1)) <- ""
returns 一个错误)。注释掉this part:if(nvar == 0L) return(structure(list(), class = "data.frame", row.names = integer()))
结果:
m <- matrix(nrow = 2, ncol = 0)
dim(rbind.data.frame2(as.data.frame(m), as.data.frame(m)))
# Returns: [1] 4 0
代码:
rbind.data.frame2 <- function(..., deparse.level = 1, make.row.names = TRUE,
stringsAsFactors = default.stringsAsFactors())
{
match.names <- function(clabs, nmi)
{
if(identical(clabs, nmi)) NULL
else if(length(nmi) == length(clabs) && all(nmi %in% clabs)) {
## we need 1-1 matches here
m <- pmatch(nmi, clabs, 0L)
if(any(m == 0L))
stop("names do not match previous names")
m
} else stop("names do not match previous names")
}
if(make.row.names)
Make.row.names <- function(nmi, ri, ni, nrow)
{
if(nzchar(nmi)) {
if(ni == 0L) character() # PR8506
else if(ni > 1L) paste(nmi, ri, sep = ".")
else nmi
}
else if(nrow > 0L && identical(ri, seq_len(ni)) &&
identical(unlist(rlabs, FALSE, FALSE), seq_len(nrow)))
as.integer(seq.int(from = nrow + 1L, length.out = ni))
else ri
}
allargs <- list(...)
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# allargs <- allargs[lengths(allargs) > 0L]
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
if(length(allargs)) {
## drop any zero-row data frames, as they may not have proper column
## types (e.g. NULL).
nr <- vapply(allargs, function(x)
if(is.data.frame(x)) .row_names_info(x, 2L)
else if(is.list(x)) length(x[[1L]])
# mismatched lists are checked later
else length(x), 1L)
if(any(nr > 0L)) allargs <- allargs[nr > 0L]
else return(allargs[[1L]]) # pretty arbitrary
}
n <- length(allargs)
if(n == 0L)
return(structure(list(),
class = "data.frame",
row.names = integer()))
nms <- names(allargs)
if(is.null(nms))
nms <- character(n)
cl <- NULL
perm <- rows <- vector("list", n)
rlabs <- if(make.row.names) rows # else NULL
nrow <- 0L
value <- clabs <- NULL
all.levs <- list()
for(i in seq_len(n)) {
## check the arguments, develop row and column labels
xi <- allargs[[i]]
nmi <- nms[i]
## coerce matrix to data frame
if(is.matrix(xi)) allargs[[i]] <- xi <-
as.data.frame(xi, stringsAsFactors = stringsAsFactors)
if(inherits(xi, "data.frame")) {
if(is.null(cl))
cl <- oldClass(xi)
ri <- attr(xi, "row.names")
ni <- length(ri)
if(is.null(clabs)) ## first time
clabs <- names(xi)
else {
if(length(xi) != length(clabs))
stop("numbers of columns of arguments do not match")
pi <- match.names(clabs, names(xi))
if( !is.null(pi) ) perm[[i]] <- pi
}
rows[[i]] <- seq.int(from = nrow + 1L, length.out = ni)
if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow)
nrow <- nrow + ni
if(is.null(value)) { ## first time ==> setup once:
value <- unclass(xi)
nvar <- length(value)
all.levs <- vector("list", nvar)
has.dim <- facCol <- ordCol <- logical(nvar)
for(j in seq_len(nvar)) {
xj <- value[[j]]
facCol[j] <-
if(!is.null(levels(xj))) {
all.levs[[j]] <- levels(xj)
TRUE # turn categories into factors
} else
is.factor(xj)
ordCol[j] <- is.ordered(xj)
has.dim[j] <- length(dim(xj)) == 2L
}
}
else for(j in seq_len(nvar)) {
xij <- xi[[j]]
if(is.null(pi) || is.na(jj <- pi[[j]])) jj <- j
if(facCol[jj]) {
if(length(lij <- levels(xij))) {
all.levs[[jj]] <- unique(c(all.levs[[jj]], lij))
ordCol[jj] <- ordCol[jj] & is.ordered(xij)
} else if(is.character(xij))
all.levs[[jj]] <- unique(c(all.levs[[jj]], xij))
}
}
}
else if(is.list(xi)) {
ni <- range(lengths(xi))
if(ni[1L] == ni[2L])
ni <- ni[1L]
else stop("invalid list argument: all variables should have the same length")
rows[[i]] <- ri <-
as.integer(seq.int(from = nrow + 1L, length.out = ni))
nrow <- nrow + ni
if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow)
if(length(nmi <- names(xi)) > 0L) {
if(is.null(clabs))
clabs <- nmi
else {
if(length(xi) != length(clabs))
stop("numbers of columns of arguments do not match")
pi <- match.names(clabs, nmi)
if( !is.null(pi) ) perm[[i]] <- pi
}
}
}
else if(length(xi)) { # 1 new row
rows[[i]] <- nrow <- nrow + 1L
if(make.row.names)
rlabs[[i]] <- if(nzchar(nmi)) nmi else as.integer(nrow)
}
}
nvar <- length(clabs)
if(nvar == 0L)
nvar <- max(lengths(allargs)) # only vector args
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# if(nvar == 0L)
# return(structure(list(), class = "data.frame",
# row.names = integer()))
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
pseq <- seq_len(nvar)
if(is.null(value)) { # this happens if there has been no data frame
value <- list()
value[pseq] <- list(logical(nrow)) # OK for coercion except to raw.
all.levs <- vector("list", nvar)
has.dim <- facCol <- ordCol <- logical(nvar)
}
names(value) <- clabs
for(j in pseq)
if(length(lij <- all.levs[[j]]))
value[[j]] <-
factor(as.vector(value[[j]]), lij, ordered = ordCol[j])
if(any(has.dim)) {
rmax <- max(unlist(rows))
for(i in pseq[has.dim])
if(!inherits(xi <- value[[i]], "data.frame")) {
dn <- dimnames(xi)
rn <- dn[[1L]]
if(length(rn) > 0L) length(rn) <- rmax
pi <- dim(xi)[2L]
length(xi) <- rmax * pi
value[[i]] <- array(xi, c(rmax, pi), list(rn, dn[[2L]]))
}
}
for(i in seq_len(n)) {
xi <- unclass(allargs[[i]])
if(!is.list(xi))
if(length(xi) != nvar)
xi <- rep(xi, length.out = nvar)
ri <- rows[[i]]
pi <- perm[[i]]
if(is.null(pi)) pi <- pseq
for(j in pseq) {
jj <- pi[j]
xij <- xi[[j]]
if(has.dim[jj]) {
value[[jj]][ri, ] <- xij
## copy rownames
rownames(value[[jj]])[ri] <- rownames(xij)
} else {
## coerce factors to vectors, in case lhs is character or
## level set has changed
value[[jj]][ri] <- if(is.factor(xij)) as.vector(xij) else xij
## copy names if any
if(!is.null(nm <- names(xij))) names(value[[jj]])[ri] <- nm
}
}
}
if(make.row.names) {
rlabs <- unlist(rlabs)
if(anyDuplicated(rlabs))
rlabs <- make.unique(as.character(rlabs), sep = "")
}
if(is.null(cl)) {
as.data.frame(value, row.names = rlabs, fix.empty.names = TRUE,
stringsAsFactors = stringsAsFactors)
} else {
structure(value, class = cl,
row.names = if(is.null(rlabs)) .set_row_names(nrow) else rlabs)
}
}