将 data.frame 的 colnames 增加 1
increment colnames of a data.frame by 1
有一个 data.frame 和 colnames
nam <- c("a", paste0("a_", seq(12)))
"a" "a_1" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12"
我怎样才能将带数字的名字的数字加 1?
预期的结果是
"a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12" "a_13"
到目前为止我的解决方案看起来很复杂...有没有比
更简单的方法
increment_names <- function(nam){
where <- regexpr("\d", nam)
ind <- which(where > 0)
increment <- as.numeric(substring(nam[ind], where[ind])) + 1
substring(nam[ind], where[ind]) <- as.character(increment)
nam
}
> increment_names(nam)
[1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12" "a_13"
只要你的模式是 "nonnumbers_numbers":
nums <- as.numeric(gsub("[^0-9]", "", nam))
nam[!is.na(nums)] <- paste0(gsub("[0-9]", "", nam), nums + 1)[!is.na(nums)]
生产:
[1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12"
[13] "a_13"
使用 gsubfn
包你可以做一些简单的事情
library(gsubfn)
gsubfn("\d+", function(x) as.numeric(x) + 1, nam)
## [1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12" "a_13"
这适用于任何 模式,您无需假定上述“nonnumbers_numbers”模式, 例如
(nam <- c("a", paste0(seq(12), "_a")))
## [1] "a" "1_a" "2_a" "3_a" "4_a" "5_a" "6_a" "7_a" "8_a" "9_a" "10_a" "11_a" "12_a"
gsubfn("\d+", function(x) as.numeric(x) + 1, nam)
## [1] "a" "2_a" "3_a" "4_a" "5_a" "6_a" "7_a" "8_a" "9_a" "10_a" "11_a" "12_a" "13_a"
基础regmatches
解法:
r <- regexpr("\d+", nam)
regmatches(nam, r) <- as.numeric(regmatches(nam, r)) + 1
nam
# [1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" ...
你可以试试 "ore" 包,你的替换可以是函数,像这样:
nam <- c("a", paste0("a_", seq(12)))
nam
library(ore)
ore.subst("-?\d+", function(x) as.numeric(x) + 1, nam, all = TRUE)
# [1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9"
# [10] "a_10" "a_11" "a_12" "a_13"
这与 "gsubfn" 包的功能相似,但(至少在这种情况下)效率更高。以下是一些基准:
library(stringi)
set.seed(1)
nam <- stri_rand_strings(10000, 5, pattern = "[A-J0-9]")
f_ORE <- function(invec = nam) {
ore.subst("-?\d+", function(x) as.numeric(x) + 1, invec, all = TRUE)
}
f_GSUBFN <- function(invec = nam) {
gsubfn("\d+", function(x) as.numeric(x) + 1, invec)
}
f_BASE <- function(invec = nam) {
r <- regexpr("\d+", invec)
regmatches(invec, r) <- as.numeric(regmatches(invec, r))+1
invec
}
system.time(f_GSUBFN())
# user system elapsed
# 5.48 0.01 5.50
library(microbenchmark)
microbenchmark(f_BASE(), f_ORE())
# Unit: milliseconds
# expr min lq mean median uq max neval
# f_BASE() 141.79743 149.58914 161.49041 152.81038 162.10550 357.6483 100
# f_ORE() 57.35309 59.58433 65.84678 60.92218 68.40062 116.7714 100
请注意,虽然 "ore" 方法和 "gsubfn" 方法区域相同,但它们似乎与基本 R 方法略有不同。
考虑:
> identical(f_ORE(), f_GSUBFN())
[1] TRUE
## Edge case...
> nam[988]
[1] "0G019"
> f_ORE()[988] ## 019 becomes 20 (without the leading zero)
[1] "1G20"
> f_GSUBFN()[988] ## Same
[1] "1G20"
> f_BASE()[988] ## This seems off...
[1] "1G019"
有一个 data.frame 和 colnames
nam <- c("a", paste0("a_", seq(12)))
"a" "a_1" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12"
我怎样才能将带数字的名字的数字加 1?
预期的结果是
"a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12" "a_13"
到目前为止我的解决方案看起来很复杂...有没有比
更简单的方法increment_names <- function(nam){
where <- regexpr("\d", nam)
ind <- which(where > 0)
increment <- as.numeric(substring(nam[ind], where[ind])) + 1
substring(nam[ind], where[ind]) <- as.character(increment)
nam
}
> increment_names(nam)
[1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12" "a_13"
只要你的模式是 "nonnumbers_numbers":
nums <- as.numeric(gsub("[^0-9]", "", nam))
nam[!is.na(nums)] <- paste0(gsub("[0-9]", "", nam), nums + 1)[!is.na(nums)]
生产:
[1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12"
[13] "a_13"
使用 gsubfn
包你可以做一些简单的事情
library(gsubfn)
gsubfn("\d+", function(x) as.numeric(x) + 1, nam)
## [1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12" "a_13"
这适用于任何 模式,您无需假定上述“nonnumbers_numbers”模式, 例如
(nam <- c("a", paste0(seq(12), "_a")))
## [1] "a" "1_a" "2_a" "3_a" "4_a" "5_a" "6_a" "7_a" "8_a" "9_a" "10_a" "11_a" "12_a"
gsubfn("\d+", function(x) as.numeric(x) + 1, nam)
## [1] "a" "2_a" "3_a" "4_a" "5_a" "6_a" "7_a" "8_a" "9_a" "10_a" "11_a" "12_a" "13_a"
基础regmatches
解法:
r <- regexpr("\d+", nam)
regmatches(nam, r) <- as.numeric(regmatches(nam, r)) + 1
nam
# [1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" ...
你可以试试 "ore" 包,你的替换可以是函数,像这样:
nam <- c("a", paste0("a_", seq(12)))
nam
library(ore)
ore.subst("-?\d+", function(x) as.numeric(x) + 1, nam, all = TRUE)
# [1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9"
# [10] "a_10" "a_11" "a_12" "a_13"
这与 "gsubfn" 包的功能相似,但(至少在这种情况下)效率更高。以下是一些基准:
library(stringi)
set.seed(1)
nam <- stri_rand_strings(10000, 5, pattern = "[A-J0-9]")
f_ORE <- function(invec = nam) {
ore.subst("-?\d+", function(x) as.numeric(x) + 1, invec, all = TRUE)
}
f_GSUBFN <- function(invec = nam) {
gsubfn("\d+", function(x) as.numeric(x) + 1, invec)
}
f_BASE <- function(invec = nam) {
r <- regexpr("\d+", invec)
regmatches(invec, r) <- as.numeric(regmatches(invec, r))+1
invec
}
system.time(f_GSUBFN())
# user system elapsed
# 5.48 0.01 5.50
library(microbenchmark)
microbenchmark(f_BASE(), f_ORE())
# Unit: milliseconds
# expr min lq mean median uq max neval
# f_BASE() 141.79743 149.58914 161.49041 152.81038 162.10550 357.6483 100
# f_ORE() 57.35309 59.58433 65.84678 60.92218 68.40062 116.7714 100
请注意,虽然 "ore" 方法和 "gsubfn" 方法区域相同,但它们似乎与基本 R 方法略有不同。
考虑:
> identical(f_ORE(), f_GSUBFN())
[1] TRUE
## Edge case...
> nam[988]
[1] "0G019"
> f_ORE()[988] ## 019 becomes 20 (without the leading zero)
[1] "1G20"
> f_GSUBFN()[988] ## Same
[1] "1G20"
> f_BASE()[988] ## This seems off...
[1] "1G019"