R select 所有列匹配的字符向量
R select all columns matching vector of characters
temp.df <- data.frame(abc_1 = rnorm(10),
abc_2 = rnorm(10),
abc_3 = rnorm(10),
xyz_1 = rnorm(10),
xyz_2 = rnorm(10),
xyz_3 = rnorm(10),
efg_1 = rnorm(10),
efg_2 = rnorm(10),
efg_3 = rnorm(10),
ID_1 = 12,
ID_2 = 121,
admin = 'a')
如果我想 select 具有匹配字符串的特定列,例如 abc
,我会这样做
temp.df %>% dplyr::select(names(temp.df)[grepl('abc', names(temp.df))])
但是,我想要做的是 select 所有具有向量中定义的匹配字符串的列
col.names <- c('abc', 'xyz', 'efg')
即select 列名称中包含 abc
或 xyz
或 efg
的列
我们可以使用matches
library(dplyr)
library(stringr)
temp.df %>%
select(matches(str_c(col.names, collapse="|")))
# abc_1 abc_2 abc_3 xyz_1 xyz_2 xyz_3 efg_1 efg_2 efg_3
#1 0.1025701 0.038799612 0.2143873 0.5337834 1.59438881 -1.0237330 1.3720176 -0.8231706 2.4193997
#2 -0.5851774 0.216113597 0.1713179 0.1539639 -1.79143793 -1.0990128 -0.2140094 -0.7128861 -0.5546666
#3 1.4683254 -0.008827679 -0.3650205 0.8683394 -0.77935366 -1.2062593 0.2266538 0.4227441 -0.7425007
#4 -1.6439009 0.750848960 -0.3695533 0.8386111 -0.49404561 1.3429484 -0.5230127 0.4106772 -0.3306021
#5 -0.9025557 1.676054692 -0.2629903 -0.4582141 -1.86148990 -0.7110888 -0.7299364 -0.1007782 -1.0758988
#6 -0.1630221 0.089035672 0.7533968 -1.1604347 -0.02611652 1.8260824 -0.4772013 -0.3414501 2.0913372
#7 0.8977818 -1.558979020 1.5228160 0.7184697 -0.31214593 1.1601056 1.7896162 -1.7205771 -1.8526482
#8 0.5783191 -1.368062940 -0.9343600 1.0642994 0.99022968 -0.3304615 -0.8067144 0.1506883 0.8354455
#9 -0.7315967 -1.476859740 -0.9646872 0.1968119 0.12040511 -1.0855037 1.4778904 1.0592415 2.0147071
#10 0.2532087 0.063369835 0.3045794 -0.9092148 0.50390732 0.6157227 1.0895302 -1.2201279 -0.4196739
您可以在 matches
中使用 col.names
:
library(dplyr)
temp.df %>% select(matches(col.names))
# abc_1 abc_2 abc_3 xyz_1 xyz_2 xyz_3 efg_1 efg_2 efg_3
#1 -0.3262 -0.264 -1.0077 0.3889 0.2485 0.8088 0.499 0.0467 -0.412
#2 0.1485 -0.885 1.2058 0.5115 0.1815 1.7407 -0.474 0.9606 -0.172
#3 -0.3799 -0.500 -0.4928 -0.6404 0.5147 -0.6443 2.594 -0.4211 -0.76
#....
或者在 base R 中使用 grepl
:
temp.df[grepl(paste0(col.names, collapse = "|"), names(temp.df))]
temp.df <- data.frame(abc_1 = rnorm(10),
abc_2 = rnorm(10),
abc_3 = rnorm(10),
xyz_1 = rnorm(10),
xyz_2 = rnorm(10),
xyz_3 = rnorm(10),
efg_1 = rnorm(10),
efg_2 = rnorm(10),
efg_3 = rnorm(10),
ID_1 = 12,
ID_2 = 121,
admin = 'a')
如果我想 select 具有匹配字符串的特定列,例如 abc
,我会这样做
temp.df %>% dplyr::select(names(temp.df)[grepl('abc', names(temp.df))])
但是,我想要做的是 select 所有具有向量中定义的匹配字符串的列
col.names <- c('abc', 'xyz', 'efg')
即select 列名称中包含 abc
或 xyz
或 efg
的列
我们可以使用matches
library(dplyr)
library(stringr)
temp.df %>%
select(matches(str_c(col.names, collapse="|")))
# abc_1 abc_2 abc_3 xyz_1 xyz_2 xyz_3 efg_1 efg_2 efg_3
#1 0.1025701 0.038799612 0.2143873 0.5337834 1.59438881 -1.0237330 1.3720176 -0.8231706 2.4193997
#2 -0.5851774 0.216113597 0.1713179 0.1539639 -1.79143793 -1.0990128 -0.2140094 -0.7128861 -0.5546666
#3 1.4683254 -0.008827679 -0.3650205 0.8683394 -0.77935366 -1.2062593 0.2266538 0.4227441 -0.7425007
#4 -1.6439009 0.750848960 -0.3695533 0.8386111 -0.49404561 1.3429484 -0.5230127 0.4106772 -0.3306021
#5 -0.9025557 1.676054692 -0.2629903 -0.4582141 -1.86148990 -0.7110888 -0.7299364 -0.1007782 -1.0758988
#6 -0.1630221 0.089035672 0.7533968 -1.1604347 -0.02611652 1.8260824 -0.4772013 -0.3414501 2.0913372
#7 0.8977818 -1.558979020 1.5228160 0.7184697 -0.31214593 1.1601056 1.7896162 -1.7205771 -1.8526482
#8 0.5783191 -1.368062940 -0.9343600 1.0642994 0.99022968 -0.3304615 -0.8067144 0.1506883 0.8354455
#9 -0.7315967 -1.476859740 -0.9646872 0.1968119 0.12040511 -1.0855037 1.4778904 1.0592415 2.0147071
#10 0.2532087 0.063369835 0.3045794 -0.9092148 0.50390732 0.6157227 1.0895302 -1.2201279 -0.4196739
您可以在 matches
中使用 col.names
:
library(dplyr)
temp.df %>% select(matches(col.names))
# abc_1 abc_2 abc_3 xyz_1 xyz_2 xyz_3 efg_1 efg_2 efg_3
#1 -0.3262 -0.264 -1.0077 0.3889 0.2485 0.8088 0.499 0.0467 -0.412
#2 0.1485 -0.885 1.2058 0.5115 0.1815 1.7407 -0.474 0.9606 -0.172
#3 -0.3799 -0.500 -0.4928 -0.6404 0.5147 -0.6443 2.594 -0.4211 -0.76
#....
或者在 base R 中使用 grepl
:
temp.df[grepl(paste0(col.names, collapse = "|"), names(temp.df))]