将矢量打印到具有预定义列数的文件中,无需回收

Print a vector to file with predefined number of columns, without recycling

我有一个包含 13 个元素的向量 x

x <- letters[c(1:9, 12:15)]
x
# [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "l" "m" "n" "o" 

我想按以下格式将向量打印到文件中,即 3 列:

 a b c     
 d e f 
 g h i
 l m n
 o

我找不到任何直接的方式以这种方式打印它。

我尝试将向量转换为具有 3 列的矩阵:

matrix(x, ncol = 3, byrow = TRUE)
#     [,1] [,2] [,3]
# [1,] "a"  "b"  "c" 
# [2,] "d"  "e"  "f" 
# [3,] "g"  "h"  "i" 
# [4,] "l"  "m"  "n" 
# [5,] "o"  "a"  "b" # <~~ "a" and "b" recycled
# Warning message:
#   In matrix(x, ncol = 3, byrow = TRUE) :
#   data length [13] is not a sub-multiple or multiple of the number of rows [5]

但是这个方法在矩阵的末尾回收了 x 的最后两个值(ab),我不希望这样。

我们可以在将向量 ('v1') 分成一组连续的 3 个元素到列表 ("lst") 之后使用 stringi 中的 stri_list2matrix。分组可以通过 gl 或使用 %/%(即 (seq_along(v1)-1)%/%3+1)来完成。

library(stringi)
lst <- split(v1, as.numeric(gl(length(v1), 3, length(v1))))
stri_list2matrix(lst, byrow=TRUE, fill='')
#     [,1] [,2] [,3]
#[1,] "a"  "b"  "c" 
#[2,] "d"  "e"  "f" 
#[3,] "g"  "h"  "i" 
#[4,] "l"  "m"  "n" 
#[5,] "o"  ""   ""  

或者使用base R,我们可以将"NA's"填充到那些元素数量少于最大长度的列表元素中。

t(sapply(lst, `length<-`, max(sapply(lst, length))))

数据

 v1 <- letters[c(1:9,12:15)]

一些可能性:

m <- matrix(x, ncol = 3, byrow = TRUE)

# create a matrix of x indices of same dimension as "m"
# find duplicated indices, retain same dimensions using MARGIN = 0
# replace duplicated x indices with `""`

m[duplicated(matrix(seq_along(x), ncol = 3, byrow = TRUE), MARGIN = 0)] <- ""
m
#     [,1] [,2] [,3]
# [1,] "a"  "b"  "c" 
# [2,] "d"  "e"  "f" 
# [3,] "g"  "h"  "i" 
# [4,] "l"  "m"  "n" 
# [5,] "o"  ""   ""

或者:

# create row index
rr <- ceiling(seq_along(x)/3)

# create column index
cc <- ave(rr, rr, FUN = seq_along)

# create an empty matrix
m <- matrix("", nrow = max(rr), ncol = 3)

# replace values at idx with X 
m[cbind(rr, cc)] <- x
m
#     [,1] [,2] [,3]
# [1,] "a"  "b"  "c" 
# [2,] "d"  "e"  "f" 
# [3,] "g"  "h"  "i" 
# [4,] "l"  "m"  "n" 
# [5,] "o"  ""   ""

添加了基准测试

a1 <- function(x){
  lst <- split(x, as.numeric(gl(length(x), 3, length(x))))
  stri_list2matrix(lst, byrow = TRUE, fill = '')
}

a2 <- function(x){
  lst <- split(x, as.numeric(gl(length(x), 3, length(x))))
  t(sapply(lst, `length<-`, max(sapply(lst, length))))
}

h1 <- function(x){
  m <- matrix(x, ncol = 3, byrow = TRUE)  
  m[duplicated(matrix(seq_along(x), ncol = 3, byrow = TRUE), MARGIN = 0)] <- ""
  m
}

h2 <- function(x){
  rr <- ceiling(seq_along(x)/3)
  cc <- ave(rr, rr, FUN = seq_along)
  m <- matrix("", nrow = max(rr), ncol = 3)
  m[cbind(rr, cc)] <- x
  m
}

x <- sample(letters, 13, replace = TRUE)
library(microbenchmark)
microbenchmark(
  a1(x),
  a2(x),
  h1(x),
  h2(x),
  times = 5)

# Unit: microseconds
#  expr     min      lq     mean  median      uq     max neval cld
# a1(x) 135.708 140.270 171.5926 144.451 162.317 275.217     5 a  
# a2(x) 270.276 270.655 277.2696 271.035 283.960 290.422     5  b 
# h1(x) 191.968 217.436 246.4028 224.659 225.420 372.531     5 ab 
# h2(x) 408.264 409.784 425.2176 411.685 412.445 483.910     5   c

x <- sample(letters, 1e6, replace = TRUE)
microbenchmark(
  a1(x),
  a2(x),
  h1(x),
  h2(x),
  times = 5)

# Unit: milliseconds
#  expr        min         lq       mean    median       uq       max neval cld
# a1(x) 2406.03363 2411.56990 2554.72548 2606.9757 2623.604 2725.4443     5  b 
# a2(x) 4266.47556 4292.69452 4510.61242 4513.6833 4653.349 4826.8594     5   c
# h1(x)   76.51557   76.79041   91.24598   78.8207  101.336  122.7672     5 a  
# h2(x) 2419.89711 2570.22968 2636.08654 2663.2898 2751.662 2775.3540     5  b

因此,对于小向量 a1 更快。对于更大的向量,h1 大约快 25*。