为什么在具有 0 列的 data.frame 上调用 rbind 会删除所有行?

Why calling rbind on data.frame with 0 columns drops all the rows?

我注意到 matrixdata.frame 对象之间的 rbind 行为存在差异。

有了 matrix 个对象,一切都按预期工作:

mat1 <- matrix(nrow=2, ncol=0)
mat2 <- matrix(nrow=2, ncol=0)

dim(rbind(mat1, mat2))
[1] 4 0

但是如果我们突然将它们变成 data.frame 它会丢失行数:

> dim(rbind(as.data.frame(mat1), as.data.frame(mat2)))
[1] 0 0

我想了解的是 - 这种行为是故意的吗?如果是这样,在这种情况下减少行数的原因是什么?


编辑:如@PoGibas 所述 - 此行为记录在 ?rbind 中。没有给出原因,可能很难推断出原因。所以问题变成:

如何 rbind 任意数量的 data.frames 同时始终保留它们的行数?

解决方法是使用 cbind 和换位:

m <- matrix(nrow = 2, ncol = 0)
as.data.frame(t(cbind(as.data.frame(t(m)), as.data.frame(t(m)))))
# Returns: data frame with 0 columns and 4 rows

此处cbind创建一个0行4列的data.frame,我们将其转置为4行0列的矩阵。


另一种解决方案是对原始 base::rbind.data.frame (source on github) 函数进行粗暴修改。

你必须 remove/comment 出两部分:

  1. 如果长度不是正整数(length(data.frame()) returns 0),则删除参数。注释掉this part:

    allargs <- allargs[lengths(allargs) > 0L]

  2. Return of empty data.frame 如果属性名称为空(不能将属性设置为空 data.frame - names(as.data.frame(mat1)) <- "" returns 一个错误)。注释掉this part:

    if(nvar == 0L) return(structure(list(), class = "data.frame", row.names = integer()))


结果:

m <- matrix(nrow = 2, ncol = 0)
dim(rbind.data.frame2(as.data.frame(m), as.data.frame(m)))
# Returns: [1] 4 0

代码:

rbind.data.frame2 <- function(..., deparse.level = 1, make.row.names = TRUE,
                             stringsAsFactors = default.stringsAsFactors())
{
    match.names <- function(clabs, nmi)
    {
    if(identical(clabs, nmi)) NULL
    else if(length(nmi) == length(clabs) && all(nmi %in% clabs)) {
            ## we need 1-1 matches here
        m <- pmatch(nmi, clabs, 0L)
            if(any(m == 0L))
                stop("names do not match previous names")
            m
    } else stop("names do not match previous names")
    }
    if(make.row.names)
    Make.row.names <- function(nmi, ri, ni, nrow)
    {
    if(nzchar(nmi)) {
            if(ni == 0L) character()  # PR8506
        else if(ni > 1L) paste(nmi, ri, sep = ".")
        else nmi
    }
    else if(nrow > 0L && identical(ri, seq_len(ni)) &&
        identical(unlist(rlabs, FALSE, FALSE), seq_len(nrow)))
        as.integer(seq.int(from = nrow + 1L, length.out = ni))
    else ri
    }
    allargs <- list(...)

    # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
    # allargs <- allargs[lengths(allargs) > 0L]
    # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

    if(length(allargs)) {
        ## drop any zero-row data frames, as they may not have proper column
        ## types (e.g. NULL).
        nr <- vapply(allargs, function(x)
                     if(is.data.frame(x)) .row_names_info(x, 2L)
                     else if(is.list(x)) length(x[[1L]])
                    # mismatched lists are checked later
                     else length(x), 1L)
        if(any(nr > 0L)) allargs <- allargs[nr > 0L]
        else return(allargs[[1L]]) # pretty arbitrary
    }
    n <- length(allargs)
    if(n == 0L)
    return(structure(list(),
             class = "data.frame",
             row.names = integer()))
    nms <- names(allargs)
    if(is.null(nms))
    nms <- character(n)
    cl <- NULL
    perm <- rows <- vector("list", n)
    rlabs <- if(make.row.names) rows # else NULL
    nrow <- 0L
    value <- clabs <- NULL
    all.levs <- list()
    for(i in seq_len(n)) {
    ## check the arguments, develop row and column labels
    xi <- allargs[[i]]
    nmi <- nms[i]
        ## coerce matrix to data frame
        if(is.matrix(xi)) allargs[[i]] <- xi <-
            as.data.frame(xi, stringsAsFactors = stringsAsFactors)
    if(inherits(xi, "data.frame")) {
        if(is.null(cl))
        cl <- oldClass(xi)
        ri <- attr(xi, "row.names")
        ni <- length(ri)
        if(is.null(clabs)) ## first time
        clabs <- names(xi)
        else {
                if(length(xi) != length(clabs))
                    stop("numbers of columns of arguments do not match")
        pi <- match.names(clabs, names(xi))
        if( !is.null(pi) ) perm[[i]] <- pi
        }
        rows[[i]] <- seq.int(from = nrow + 1L, length.out = ni)
        if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow)
        nrow <- nrow + ni
        if(is.null(value)) { ## first time ==> setup once:
        value <- unclass(xi)
        nvar <- length(value)
        all.levs <- vector("list", nvar)
        has.dim <- facCol <- ordCol <- logical(nvar)
        for(j in seq_len(nvar)) {
            xj <- value[[j]]
                    facCol[j] <-
                        if(!is.null(levels(xj))) {
                            all.levs[[j]] <- levels(xj)
                            TRUE # turn categories into factors
                        } else
                            is.factor(xj)
                    ordCol[j] <- is.ordered(xj)
            has.dim[j] <- length(dim(xj)) == 2L
        }
        }
        else for(j in seq_len(nvar)) {
                xij <- xi[[j]]
                if(is.null(pi) || is.na(jj <- pi[[j]])) jj <- j
                if(facCol[jj]) {
                    if(length(lij <- levels(xij))) {
                        all.levs[[jj]] <- unique(c(all.levs[[jj]], lij))
                        ordCol[jj] <- ordCol[jj] & is.ordered(xij)
                    } else if(is.character(xij))
                        all.levs[[jj]] <- unique(c(all.levs[[jj]], xij))
                }
            }
    }
    else if(is.list(xi)) {
        ni <- range(lengths(xi))
        if(ni[1L] == ni[2L])
        ni <- ni[1L]
        else stop("invalid list argument: all variables should have the same length")
        rows[[i]] <- ri <-
                as.integer(seq.int(from = nrow + 1L, length.out = ni))
        nrow <- nrow + ni
        if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow)
        if(length(nmi <- names(xi)) > 0L) {
        if(is.null(clabs))
            clabs <- nmi
        else {
                    if(length(xi) != length(clabs))
                        stop("numbers of columns of arguments do not match")
            pi <- match.names(clabs, nmi)
            if( !is.null(pi) ) perm[[i]] <- pi
        }
        }
    }
    else if(length(xi)) { # 1 new row
        rows[[i]] <- nrow <- nrow + 1L
            if(make.row.names)
        rlabs[[i]] <- if(nzchar(nmi)) nmi else as.integer(nrow)
    }
    }
    nvar <- length(clabs)
    if(nvar == 0L)
    nvar <- max(lengths(allargs)) # only vector args

    # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
    # if(nvar == 0L)
    # return(structure(list(), class = "data.frame",
    #          row.names = integer()))
    # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

    pseq <- seq_len(nvar)
    if(is.null(value)) { # this happens if there has been no data frame
    value <- list()
    value[pseq] <- list(logical(nrow)) # OK for coercion except to raw.
        all.levs <- vector("list", nvar)
    has.dim <- facCol <- ordCol <- logical(nvar)
    }
    names(value) <- clabs
    for(j in pseq)
    if(length(lij <- all.levs[[j]]))
            value[[j]] <-
                factor(as.vector(value[[j]]), lij, ordered = ordCol[j])
    if(any(has.dim)) {
    rmax <- max(unlist(rows))
    for(i in pseq[has.dim])
        if(!inherits(xi <- value[[i]], "data.frame")) {
        dn <- dimnames(xi)
        rn <- dn[[1L]]
        if(length(rn) > 0L) length(rn) <- rmax
        pi <- dim(xi)[2L]
        length(xi) <- rmax * pi
        value[[i]] <- array(xi, c(rmax, pi), list(rn, dn[[2L]]))
        }
    }
    for(i in seq_len(n)) {
    xi <- unclass(allargs[[i]])
    if(!is.list(xi))
        if(length(xi) != nvar)
        xi <- rep(xi, length.out = nvar)
    ri <- rows[[i]]
    pi <- perm[[i]]
    if(is.null(pi)) pi <- pseq
    for(j in pseq) {
        jj <- pi[j]
            xij <- xi[[j]]
        if(has.dim[jj]) {
        value[[jj]][ri,  ] <- xij
                ## copy rownames
                rownames(value[[jj]])[ri] <- rownames(xij)
        } else {
                ## coerce factors to vectors, in case lhs is character or
                ## level set has changed
                value[[jj]][ri] <- if(is.factor(xij)) as.vector(xij) else xij
                ## copy names if any
                if(!is.null(nm <- names(xij))) names(value[[jj]])[ri] <- nm
            }
    }
    }
    if(make.row.names) {
    rlabs <- unlist(rlabs)
    if(anyDuplicated(rlabs))
        rlabs <- make.unique(as.character(rlabs), sep = "")
    }
    if(is.null(cl)) {
    as.data.frame(value, row.names = rlabs, fix.empty.names = TRUE,
              stringsAsFactors = stringsAsFactors)
    } else {
    structure(value, class = cl,
          row.names = if(is.null(rlabs)) .set_row_names(nrow) else rlabs)
    }
}