将矢量打印到具有预定义列数的文件中,无需回收
Print a vector to file with predefined number of columns, without recycling
我有一个包含 13 个元素的向量 x
:
x <- letters[c(1:9, 12:15)]
x
# [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "l" "m" "n" "o"
我想按以下格式将向量打印到文件中,即 3 列:
a b c
d e f
g h i
l m n
o
我找不到任何直接的方式以这种方式打印它。
我尝试将向量转换为具有 3 列的矩阵:
matrix(x, ncol = 3, byrow = TRUE)
# [,1] [,2] [,3]
# [1,] "a" "b" "c"
# [2,] "d" "e" "f"
# [3,] "g" "h" "i"
# [4,] "l" "m" "n"
# [5,] "o" "a" "b" # <~~ "a" and "b" recycled
# Warning message:
# In matrix(x, ncol = 3, byrow = TRUE) :
# data length [13] is not a sub-multiple or multiple of the number of rows [5]
但是这个方法在矩阵的末尾回收了 x
的最后两个值(a
和 b
),我不希望这样。
我们可以在将向量 ('v1') 分成一组连续的 3 个元素到列表 ("lst") 之后使用 stringi
中的 stri_list2matrix
。分组可以通过 gl
或使用 %/%
(即 (seq_along(v1)-1)%/%3+1
)来完成。
library(stringi)
lst <- split(v1, as.numeric(gl(length(v1), 3, length(v1))))
stri_list2matrix(lst, byrow=TRUE, fill='')
# [,1] [,2] [,3]
#[1,] "a" "b" "c"
#[2,] "d" "e" "f"
#[3,] "g" "h" "i"
#[4,] "l" "m" "n"
#[5,] "o" "" ""
或者使用base R
,我们可以将"NA's"填充到那些元素数量少于最大长度的列表元素中。
t(sapply(lst, `length<-`, max(sapply(lst, length))))
数据
v1 <- letters[c(1:9,12:15)]
一些可能性:
m <- matrix(x, ncol = 3, byrow = TRUE)
# create a matrix of x indices of same dimension as "m"
# find duplicated indices, retain same dimensions using MARGIN = 0
# replace duplicated x indices with `""`
m[duplicated(matrix(seq_along(x), ncol = 3, byrow = TRUE), MARGIN = 0)] <- ""
m
# [,1] [,2] [,3]
# [1,] "a" "b" "c"
# [2,] "d" "e" "f"
# [3,] "g" "h" "i"
# [4,] "l" "m" "n"
# [5,] "o" "" ""
或者:
# create row index
rr <- ceiling(seq_along(x)/3)
# create column index
cc <- ave(rr, rr, FUN = seq_along)
# create an empty matrix
m <- matrix("", nrow = max(rr), ncol = 3)
# replace values at idx with X
m[cbind(rr, cc)] <- x
m
# [,1] [,2] [,3]
# [1,] "a" "b" "c"
# [2,] "d" "e" "f"
# [3,] "g" "h" "i"
# [4,] "l" "m" "n"
# [5,] "o" "" ""
添加了基准测试
a1 <- function(x){
lst <- split(x, as.numeric(gl(length(x), 3, length(x))))
stri_list2matrix(lst, byrow = TRUE, fill = '')
}
a2 <- function(x){
lst <- split(x, as.numeric(gl(length(x), 3, length(x))))
t(sapply(lst, `length<-`, max(sapply(lst, length))))
}
h1 <- function(x){
m <- matrix(x, ncol = 3, byrow = TRUE)
m[duplicated(matrix(seq_along(x), ncol = 3, byrow = TRUE), MARGIN = 0)] <- ""
m
}
h2 <- function(x){
rr <- ceiling(seq_along(x)/3)
cc <- ave(rr, rr, FUN = seq_along)
m <- matrix("", nrow = max(rr), ncol = 3)
m[cbind(rr, cc)] <- x
m
}
x <- sample(letters, 13, replace = TRUE)
library(microbenchmark)
microbenchmark(
a1(x),
a2(x),
h1(x),
h2(x),
times = 5)
# Unit: microseconds
# expr min lq mean median uq max neval cld
# a1(x) 135.708 140.270 171.5926 144.451 162.317 275.217 5 a
# a2(x) 270.276 270.655 277.2696 271.035 283.960 290.422 5 b
# h1(x) 191.968 217.436 246.4028 224.659 225.420 372.531 5 ab
# h2(x) 408.264 409.784 425.2176 411.685 412.445 483.910 5 c
x <- sample(letters, 1e6, replace = TRUE)
microbenchmark(
a1(x),
a2(x),
h1(x),
h2(x),
times = 5)
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# a1(x) 2406.03363 2411.56990 2554.72548 2606.9757 2623.604 2725.4443 5 b
# a2(x) 4266.47556 4292.69452 4510.61242 4513.6833 4653.349 4826.8594 5 c
# h1(x) 76.51557 76.79041 91.24598 78.8207 101.336 122.7672 5 a
# h2(x) 2419.89711 2570.22968 2636.08654 2663.2898 2751.662 2775.3540 5 b
因此,对于小向量 a1 更快。对于更大的向量,h1
大约快 25*。
我有一个包含 13 个元素的向量 x
:
x <- letters[c(1:9, 12:15)]
x
# [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "l" "m" "n" "o"
我想按以下格式将向量打印到文件中,即 3 列:
a b c
d e f
g h i
l m n
o
我找不到任何直接的方式以这种方式打印它。
我尝试将向量转换为具有 3 列的矩阵:
matrix(x, ncol = 3, byrow = TRUE)
# [,1] [,2] [,3]
# [1,] "a" "b" "c"
# [2,] "d" "e" "f"
# [3,] "g" "h" "i"
# [4,] "l" "m" "n"
# [5,] "o" "a" "b" # <~~ "a" and "b" recycled
# Warning message:
# In matrix(x, ncol = 3, byrow = TRUE) :
# data length [13] is not a sub-multiple or multiple of the number of rows [5]
但是这个方法在矩阵的末尾回收了 x
的最后两个值(a
和 b
),我不希望这样。
我们可以在将向量 ('v1') 分成一组连续的 3 个元素到列表 ("lst") 之后使用 stringi
中的 stri_list2matrix
。分组可以通过 gl
或使用 %/%
(即 (seq_along(v1)-1)%/%3+1
)来完成。
library(stringi)
lst <- split(v1, as.numeric(gl(length(v1), 3, length(v1))))
stri_list2matrix(lst, byrow=TRUE, fill='')
# [,1] [,2] [,3]
#[1,] "a" "b" "c"
#[2,] "d" "e" "f"
#[3,] "g" "h" "i"
#[4,] "l" "m" "n"
#[5,] "o" "" ""
或者使用base R
,我们可以将"NA's"填充到那些元素数量少于最大长度的列表元素中。
t(sapply(lst, `length<-`, max(sapply(lst, length))))
数据
v1 <- letters[c(1:9,12:15)]
一些可能性:
m <- matrix(x, ncol = 3, byrow = TRUE)
# create a matrix of x indices of same dimension as "m"
# find duplicated indices, retain same dimensions using MARGIN = 0
# replace duplicated x indices with `""`
m[duplicated(matrix(seq_along(x), ncol = 3, byrow = TRUE), MARGIN = 0)] <- ""
m
# [,1] [,2] [,3]
# [1,] "a" "b" "c"
# [2,] "d" "e" "f"
# [3,] "g" "h" "i"
# [4,] "l" "m" "n"
# [5,] "o" "" ""
或者:
# create row index
rr <- ceiling(seq_along(x)/3)
# create column index
cc <- ave(rr, rr, FUN = seq_along)
# create an empty matrix
m <- matrix("", nrow = max(rr), ncol = 3)
# replace values at idx with X
m[cbind(rr, cc)] <- x
m
# [,1] [,2] [,3]
# [1,] "a" "b" "c"
# [2,] "d" "e" "f"
# [3,] "g" "h" "i"
# [4,] "l" "m" "n"
# [5,] "o" "" ""
添加了基准测试
a1 <- function(x){
lst <- split(x, as.numeric(gl(length(x), 3, length(x))))
stri_list2matrix(lst, byrow = TRUE, fill = '')
}
a2 <- function(x){
lst <- split(x, as.numeric(gl(length(x), 3, length(x))))
t(sapply(lst, `length<-`, max(sapply(lst, length))))
}
h1 <- function(x){
m <- matrix(x, ncol = 3, byrow = TRUE)
m[duplicated(matrix(seq_along(x), ncol = 3, byrow = TRUE), MARGIN = 0)] <- ""
m
}
h2 <- function(x){
rr <- ceiling(seq_along(x)/3)
cc <- ave(rr, rr, FUN = seq_along)
m <- matrix("", nrow = max(rr), ncol = 3)
m[cbind(rr, cc)] <- x
m
}
x <- sample(letters, 13, replace = TRUE)
library(microbenchmark)
microbenchmark(
a1(x),
a2(x),
h1(x),
h2(x),
times = 5)
# Unit: microseconds
# expr min lq mean median uq max neval cld
# a1(x) 135.708 140.270 171.5926 144.451 162.317 275.217 5 a
# a2(x) 270.276 270.655 277.2696 271.035 283.960 290.422 5 b
# h1(x) 191.968 217.436 246.4028 224.659 225.420 372.531 5 ab
# h2(x) 408.264 409.784 425.2176 411.685 412.445 483.910 5 c
x <- sample(letters, 1e6, replace = TRUE)
microbenchmark(
a1(x),
a2(x),
h1(x),
h2(x),
times = 5)
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# a1(x) 2406.03363 2411.56990 2554.72548 2606.9757 2623.604 2725.4443 5 b
# a2(x) 4266.47556 4292.69452 4510.61242 4513.6833 4653.349 4826.8594 5 c
# h1(x) 76.51557 76.79041 91.24598 78.8207 101.336 122.7672 5 a
# h2(x) 2419.89711 2570.22968 2636.08654 2663.2898 2751.662 2775.3540 5 b
因此,对于小向量 a1 更快。对于更大的向量,h1
大约快 25*。