如何根据列表删除多个列值
how to remove multiple col values base on a list
我有一个数据集 df,如下所示:
如果我想移动列表 RM 中的任何值。我该怎么办?
df 和 RM 可以使用代码构建:
df<-structure(list(cy3.CSV = c("cy3.CSV", NA, NA), cy6.CSV = c("cy1.CSV",
"cy24.CSV", "cy6.CSV"), dlt.CSV = c("dlt.CSV", NA, NA), dm.CSV = c("dm.CSV",
NA, NA), dov.CSV = c("dov.CSV", "dov_1.CSV", NA), dov_1.CSV = c("dov_1.CSV",
NA, NA), ds.CSV = c("ds.CSV", "ds_1.CSV", NA)), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"))
RM <-c("chem.CSV", "dov_1.CSV", "eot_p.CSV", "dm.CSV', "vs.CSV")
结果应该是这样的
我该怎么办?请给我尽可能多的可能的解决方案。谢谢
这个有用吗:
data.frame(lapply(df, function(x) gsub(paste0(RM, collapse = '|'), NA, x)))
cy3.CSV cy6.CSV dlt.CSV dm.CSV dov.CSV dov_1.CSV ds.CSV
1 cy3.CSV cy1.CSV dlt.CSV <NA> dov.CSV <NA> ds.CSV
2 <NA> cy24.CSV <NA> <NA> <NA> <NA> ds_1.CSV
3 <NA> cy6.CSV <NA> <NA> <NA> <NA> <NA>
使用 purrr 和 stringr 包:
library(purrr)
library(stringr)
map_df(df, ~ str_replace_all(.x, str_c(RM, collapse = '|'), NA_character_))
# A tibble: 3 x 7
cy3.CSV cy6.CSV dlt.CSV dm.CSV dov.CSV dov_1.CSV ds.CSV
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 cy3.CSV cy1.CSV dlt.CSV NA dov.CSV NA ds.CSV
2 NA cy24.CSV NA NA NA NA ds_1.CSV
3 NA cy6.CSV NA NA NA NA NA
我们可以使用 across
和 mutate
library(dplyr)
df %>%
mutate(across(everything(), ~ replace(., . %in% RM, NA)))
-输出
# A tibble: 3 x 7
# cy3.CSV cy6.CSV dlt.CSV dm.CSV dov.CSV dov_1.CSV ds.CSV
# <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 cy3.CSV cy1.CSV dlt.CSV <NA> dov.CSV <NA> ds.CSV
#2 <NA> cy24.CSV <NA> <NA> <NA> <NA> ds_1.CSV
#3 <NA> cy6.CSV <NA> <NA> <NA> <NA> <NA>
我有一个数据集 df,如下所示:
如果我想移动列表 RM 中的任何值。我该怎么办?
df 和 RM 可以使用代码构建:
df<-structure(list(cy3.CSV = c("cy3.CSV", NA, NA), cy6.CSV = c("cy1.CSV",
"cy24.CSV", "cy6.CSV"), dlt.CSV = c("dlt.CSV", NA, NA), dm.CSV = c("dm.CSV",
NA, NA), dov.CSV = c("dov.CSV", "dov_1.CSV", NA), dov_1.CSV = c("dov_1.CSV",
NA, NA), ds.CSV = c("ds.CSV", "ds_1.CSV", NA)), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"))
RM <-c("chem.CSV", "dov_1.CSV", "eot_p.CSV", "dm.CSV', "vs.CSV")
结果应该是这样的
我该怎么办?请给我尽可能多的可能的解决方案。谢谢
这个有用吗:
data.frame(lapply(df, function(x) gsub(paste0(RM, collapse = '|'), NA, x)))
cy3.CSV cy6.CSV dlt.CSV dm.CSV dov.CSV dov_1.CSV ds.CSV
1 cy3.CSV cy1.CSV dlt.CSV <NA> dov.CSV <NA> ds.CSV
2 <NA> cy24.CSV <NA> <NA> <NA> <NA> ds_1.CSV
3 <NA> cy6.CSV <NA> <NA> <NA> <NA> <NA>
使用 purrr 和 stringr 包:
library(purrr)
library(stringr)
map_df(df, ~ str_replace_all(.x, str_c(RM, collapse = '|'), NA_character_))
# A tibble: 3 x 7
cy3.CSV cy6.CSV dlt.CSV dm.CSV dov.CSV dov_1.CSV ds.CSV
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 cy3.CSV cy1.CSV dlt.CSV NA dov.CSV NA ds.CSV
2 NA cy24.CSV NA NA NA NA ds_1.CSV
3 NA cy6.CSV NA NA NA NA NA
我们可以使用 across
和 mutate
library(dplyr)
df %>%
mutate(across(everything(), ~ replace(., . %in% RM, NA)))
-输出
# A tibble: 3 x 7
# cy3.CSV cy6.CSV dlt.CSV dm.CSV dov.CSV dov_1.CSV ds.CSV
# <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 cy3.CSV cy1.CSV dlt.CSV <NA> dov.CSV <NA> ds.CSV
#2 <NA> cy24.CSV <NA> <NA> <NA> <NA> ds_1.CSV
#3 <NA> cy6.CSV <NA> <NA> <NA> <NA> <NA>