替换嵌套的 tibble / dataframe 中的 NULL 值

Replace NULL value in nested tibble / dataframe

我有一堆小东西:

a = tibble(a = c(1:3))
df <- tibble::tibble(a1 = list(a,a), a2 = list(NULL,a))

# A tibble: 2 x 2
  a1               a2              
  <list>           <list>          
1 <tibble [3 x 1]> <NULL>          
2 <tibble [3 x 1]> <tibble [3 x 1]>

我想用另一个小标题替换 <NULL> 值,比方说 a

但是,

df %>% tidyr::replace_na(list(a2 = a))

给这个

# A tibble: 2 x 2
  a1               a2              
  <list>           <list>          
1 <tibble [3 x 1]> <int [3]>       
2 <tibble [3 x 1]> <tibble [3 x 1]>

虽然我喜欢这个

# A tibble: 2 x 2
  a1               a2              
  <list>           <list>          
1 <tibble [3 x 1]> <tibble [3 x 1]>   
2 <tibble [3 x 1]> <tibble [3 x 1]>

有简单的解决方法吗?

a <- tibble::tibble(a = 1:3)
df <- tibble::tibble(a1 = list(a, a), a2 = list(NA, a))
df[is.na(df)] <- list(a)
df
# # A tibble: 2 × 2
#   a1               a2              
#   <list>           <list>          
# 1 <tibble [3 × 1]> <tibble [3 × 1]>
# 2 <tibble [3 × 1]> <tibble [3 × 1]>

更新为 OP 修改了问题:

library(dplyr)

a <- tibble(a = 1:3)
df <- tibble(a1 = list(a, a), a2 = list(NULL, a))
df %>% replace(. == "NULL", list(a))
# # A tibble: 2 × 2
#   a1               a2              
#   <list>           <list>          
# 1 <tibble [3 × 1]> <tibble [3 × 1]>
# 2 <tibble [3 × 1]> <tibble [3 × 1]>

另一个可能的解决方案:

library(tidyverse)

a = tibble(a = c(1:3))
df <- tibble::tibble(a1 = list(a,a), a2 = list(NULL,a))

df %>% 
  replace_na(list(a2 = list(a)))

#> # A tibble: 2 × 2
#>   a1               a2              
#>   <list>           <list>          
#> 1 <tibble [3 × 1]> <tibble [3 × 1]>
#> 2 <tibble [3 × 1]> <tibble [3 × 1]>

另一个(通用)解决方案,假设嵌套级别是预先知道的:

library(purrr)

map_depth(df, .depth = 2, .f = ~if(is.null(.x)) a else .x) %>% 
  as_tibble()

#> # A tibble: 2 × 2
#>   a1               a2              
#>   <list>           <list>          
#> 1 <tibble [3 × 1]> <tibble [3 × 1]>
#> 2 <tibble [3 × 1]> <tibble [3 × 1]>

对于任意层级的嵌套,我们也可以在rrapply-pacakge中使用rrapply()

library(rrapply)

rrapply(df, condition = is.null, f = function(x) a)

#> # A tibble: 2 × 2
#>   a1               a2              
#>   <list>           <list>          
#> 1 <tibble [3 × 1]> <tibble [3 × 1]>
#> 2 <tibble [3 × 1]> <tibble [3 × 1]>