替换 R 中不同数据帧的值

Substitute values across different data frames in R

我有以下 4 个数据帧的代码。每个的最后一列只有 2 个值,要么是零 ("0") 要么是一个 id,这在每个 df 中都是相同的,但在每个 df.

之间是不同的

如何用 id 列中的所有零替换所有相同的 ID?

例如,将 df1 更改为:

     year    counts    id
 1   2015    0         0
 2   2016    0         0
 3   2017    7         Fg4s5
 4   2018    8         Fg4s5
 5   2019    5         0
 6   2020    12        Fg4s5

至:

     year    counts    id
 1   2015    0         Fg4s5
 2   2016    0         Fg4s5
 3   2017    7         Fg4s5
 4   2018    8         Fg4s5
 5   2019    5         Fg4s5
 6   2020    12        Fg4s5

其他 df 的 id 相同。

数据帧代码:

 df1 <- data.frame(
   year = c(2015:2020),
   counts = c(0, 0, 7, 8, 5, 12),
   id = c(0, 0, "Fg4s5", "Fg4s5", 0, "Fg4s5")
 )
 df2 <- data.frame(
   year = c(2014:2020),
   counts = c(1, 5, 9, 2, 2, 19, 3),
   id = c(0, 0, 0, 0, 0, "Qd8a2", "Qd8a2")
 )
 df3 <- data.frame(
   year = c(2016:2020),
   counts = c(0, 0, 0, 0, 6),
   id = c(0, 0, "Wk9l4", "Wk9l4", "Wk9l4")
 )
 df4 <- data.frame(
   year = c(2014:2020),
   counts = c(0, 0, 8, 1, 9, 12, 23),
   id = c(0, "Rd7q0", 0, 0, "Rd7q0", "Rd7q0", "Rd7q0")
 )

将数据帧放入列表中并使用 lapply 更改 id 列中的值:

list_df <- list(df1, df2, df3, df4)

lapply(list_df, function(x) {
  transform(x, id = replace(id, id == 0, id[id != '0'][1]))
}) -> list_df

list_df

#[[1]]
#  year counts    id
#1 2015      0 Fg4s5
#2 2016      0 Fg4s5
#3 2017      7 Fg4s5
#4 2018      8 Fg4s5
#5 2019      5 Fg4s5
#6 2020     12 Fg4s5

#[[2]]
#  year counts    id
#1 2014      1 Qd8a2
#2 2015      5 Qd8a2
#3 2016      9 Qd8a2
#4 2017      2 Qd8a2
#5 2018      2 Qd8a2
#6 2019     19 Qd8a2
#7 2020      3 Qd8a2

#[[3]]
#  year counts    id
#1 2016      0 Wk9l4
#2 2017      0 Wk9l4
#3 2018      0 Wk9l4
#4 2019      0 Wk9l4
#5 2020      6 Wk9l4

#[[4]]
#  year counts    id
#1 2014      0 Rd7q0
#2 2015      0 Rd7q0
#3 2016      8 Rd7q0
#4 2017      1 Rd7q0
#5 2018      9 Rd7q0
#6 2019     12 Rd7q0
#7 2020     23 Rd7q0

将它们放在单独的数据框中。

names(list_df) <- paste0('df', 1:4)
list2env(list_df, .GlobalEnv)

使用purrr::map

map(list(df1, df2, df3, df4),  ~ .x %>% mutate(id = first(id[id != "0"])))

[[1]]
  year counts    id
1 2015      0 Fg4s5
2 2016      0 Fg4s5
3 2017      7 Fg4s5
4 2018      8 Fg4s5
5 2019      5 Fg4s5
6 2020     12 Fg4s5

[[2]]
  year counts    id
1 2014      1 Qd8a2
2 2015      5 Qd8a2
3 2016      9 Qd8a2
4 2017      2 Qd8a2
5 2018      2 Qd8a2
6 2019     19 Qd8a2
7 2020      3 Qd8a2

[[3]]
  year counts    id
1 2016      0 Wk9l4
2 2017      0 Wk9l4
3 2018      0 Wk9l4
4 2019      0 Wk9l4
5 2020      6 Wk9l4

[[4]]
  year counts    id
1 2014      0 Rd7q0
2 2015      0 Rd7q0
3 2016      8 Rd7q0
4 2017      1 Rd7q0
5 2018      9 Rd7q0
6 2019     12 Rd7q0
7 2020     23 Rd7q0