Class 输出对象因输入数据不同而不同

Question

我试图为每次 n 尝试绘制数量可变的样本。在这个例子中 n = 8 因为 length(n.obs) == 8。绘制完所有样本后，我想将它们组合成 matrix.

这是我的第一次尝试：

set.seed(1234)
n.obs <- c(2,1,2,2,2,2,2,2)
my.samples <- sapply(1:8, function(x) sample(1:4, size=n.obs[x], prob=c(0.1,0.2,0.3,0.4), replace=TRUE))
my.samples

这种方法产生 list.

class(my.samples)
#[1] "list"

我使用以下方法确定输出 matrix 中所需的列数：

max.len <- max(sapply(my.samples, length))
max.len
#[1] 2

可以使用以下方法创建输出 matrix：

 corrected.list <- lapply(my.samples, function(x) {c(x, rep(NA, max.len - length(x)))})
 output.matrix <- do.call(rbind, corrected.list)
 output.matrix[is.na(output.matrix)] <- 0
 output.matrix
 #     [,1] [,2]
 #[1,]    4    3 
 #[2,]    3    0
 #[3,]    3    2
 #[4,]    3    4
 #[5,]    4    3
 #[6,]    3    3
 #[7,]    3    4
 #[8,]    1    4

上述方法似乎工作正常，因为 n.obs 包含多个值并且 n.obs > 1 中至少有一个 element。但是，我希望代码足够灵活以处理以下每个 n.obs:

上面的 sapply 语句 returns 一个 2 x 8 matrix 加上下面的 n.obs.

set.seed(1234)
n.obs <- c(2,2,2,2,2,2,2,2)

上面的 sapply 语句 returns 一个 integer 和下面的 n.obs.

set.seed(3333)
n.obs <- c(1,1,1,1,1,1,1,1)

上面的sapply语句returns一个list加上下面的n.obs.

n.obs <- c(0,0,0,0,0,0,0,0)

以下是上述三个示例所需的结果 n.obs:

desired.output <- matrix(c(4, 3,
                           3, 3,
                           2, 3,
                           4, 4,
                           3, 3,
                           3, 3,
                           4, 1,
                           4, 2), ncol = 2, byrow = TRUE)

desired.output <- matrix(c(2,
                           3,
                           4,
                           2,
                           3,
                           4,
                           4,
                           1), ncol = 1, byrow = TRUE)

desired.output <- matrix(c(0,
                           0,
                           0,
                           0,
                           0,
                           0,
                           0,
                           0), ncol = 1, byrow = TRUE)

我如何概括代码，使其始终 returns 具有八行的 matrix 而不管 n.obs 用作输入？一种方法是使用一系列 if 语句来处理有问题的情况，但我认为可能有更简单、更有效的解决方案。

Answer 1

我们可以写一个函数：

get_matrix <- function(n.obs) {

   nr <- length(n.obs)
   my.samples <- sapply(n.obs, function(x) 
                  sample(1:4, size=x, prob=c(0.1,0.2,0.3,0.4), replace=TRUE))
   max.len <- max(lengths(my.samples))
   mat <- matrix(c(sapply(my.samples, `[`, 1:max.len)), nrow = nr, byrow = TRUE)
   mat[is.na(mat)] <- 0
   mat
}

检查输出：

get_matrix(c(2,1,2,2,2,2,2,2))

#     [,1] [,2]
#[1,]    1    4
#[2,]    4    0
#[3,]    4    3
#[4,]    4    4
#[5,]    4    2
#[6,]    4    3
#[7,]    4    4
#[8,]    4    4

get_matrix(c(1,1,1,1,1,1,1,1))

#     [,1]
#[1,]    4
#[2,]    4
#[3,]    3
#[4,]    4
#[5,]    2
#[6,]    4
#[7,]    1
#[8,]    4

get_matrix(c(0,0,0,0,0,0,0,0))
#     [,1]
#[1,]    0
#[2,]    0
#[3,]    0
#[4,]    0
#[5,]    0
#[6,]    0
#[7,]    0
#[8,]    0

Answer 2

您可以在 size= 参数上 Vectorize sample 函数。

samplev <- Vectorize(sample, "size", SIMPLIFY=F)

将 samplev 包装到一个函数中，并在 lapply 中使用 length<- 分配最大长度。

FUN <- function(n.obs, prob.=c(.1,.2,.3,.4)) {
  s <- do.call(rbind, lapply(
    samplev(1:4, size=n.obs, prob=prob., replace=TRUE), 
    `length<-`, max(n.obs)))
  if (!all(dim(s))) matrix(0, length(n.obs)) 
  else ({s[is.na(s)] <- 0; s})
}

结果：

set.seed(1234)
FUN(c(2,1,2,2,2,2,2,2))
#      [,1] [,2]
# [1,]    4    3
# [2,]    3    0
# [3,]    3    2
# [4,]    3    4
# [5,]    4    3
# [6,]    3    3
# [7,]    3    4
# [8,]    1    4

FUN(c(2,2,2,2,2,2,2,2))
#      [,1] [,2]
# [1,]    2    4
# [2,]    4    4
# [3,]    4    4
# [4,]    4    4
# [5,]    4    4
# [6,]    2    3
# [7,]    1    2
# [8,]    4    3
FUN(c(1,1,1,1,1,1,1,1))
#      [,1]
# [1,]    4
# [2,]    4
# [3,]    3
# [4,]    4
# [5,]    2
# [6,]    4
# [7,]    4
# [8,]    1

FUN(c(0,0,0,0,0,0,0,0))
#      [,1]
# [1,]    0
# [2,]    0
# [3,]    0
# [4,]    0
# [5,]    0
# [6,]    0
# [7,]    0
# [8,]    0

FUN(c(3, 4))
#      [,1] [,2] [,3] [,4]
# [1,]    2    3    3    0
# [2,]    4    3    4    3

Class 输出对象因输入数据不同而不同

Class of output object differs as input data differs

r

sapply