用列的中位数替换R中Dataframe中的NULL值

Replacing NULL value in Dataframe in R with Median of Column

我有一个包含多个 NULL 值的数据框。 class 类型的列是 LIST 而不是 NUMERIC。是否可以用列的中值替换所有 NULL 值?我尝试了一种手动方法,即使用 as.numeric() 函数将第 1 列的 NULL 值更改为 1,然后应用 median() 函数。有没有更有效的方法来做到这一点?

i1 <- sapply(pivot_table_1$`Start Working`, is.null)
pivot_table_1$`Start Working`[i1] <- 0

来自 dput() 的输出:

structure(list(Day = 1:31, `Start Sleeping` = list(0, 20, 35, 
    40, 50, 0, 40, 0, 0, 40, 50, 0, 0, 40, 0, 40, 35, 45, 0, 
    0, 65, 35, 40, 40, 0, 50, 40, 0, 0, 0, 0), `Stop Sleeping` = list(
    440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 
    440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 
    440, 440, 440, 440, 440, 440, 440), `Start Working` = list(
    490, 490, 490, 490, 0, 0, 490, 490, 490, 490, 490, 0, 0, 
    490, 490, 490, 490, 490, 0, 0, 490, 490, 490, 490, 490, 0, 
    0, 490, 490, 490, 490), `Stop Working` = list(1005, 1005, 
    1005, 1005, NULL, NULL, 965, 965, 965, 965, 965, NULL, NULL, 
    965, 965, 965, 965, 965, NULL, NULL, 965, 965, 965, 965, 
    965, NULL, NULL, 965, 965, 965, 965), Breakfast = list(690, 
    645, 615, 540, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    475, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 475, NULL, 
    NULL, NULL, NULL, NULL, 475, NULL, NULL, NULL, NULL, NULL), 
    Dinner = list(1390, 1360, 1285, 1270, 1390, NULL, 1140, 1140, 
        1130, 1135, 1130, NULL, 1165, 1140, 1130, 1135, 1130, 
        1140, 1140, 1180, NULL, 1145, 1135, 1140, 1135, 1160, 
        1140, 1140, NULL, 1140, NULL)), row.names = c(NA, -31L
), class = c("tbl_df", "tbl", "data.frame"))

如果您希望将条目保留为 length-one 列表,您可以这样做:

pivot_table_1[] <- lapply(pivot_table_1, function(x) {
    ifelse(lengths(x) == 1, x, list(median(unlist(x))))})

pivot_table_1
#> # A tibble: 31 x 7
#>      Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#>    <int> <list>           <list>          <list>          <list>        
#>  1     1 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>     
#>  2     2 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>     
#>  3     3 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>     
#>  4     4 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>     
#>  5     5 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>     
#>  6     6 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>     
#>  7     7 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>     
#>  8     8 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>     
#>  9     9 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>     
#> 10    10 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>     
#> # ... with 21 more rows, and 2 more variables: Breakfast <list>, Dinner <list>

或者,如果您希望它们作为数字列,请执行以下操作:

pivot_table_1[] <- lapply(pivot_table_1, function(x) {
    unlist(ifelse(lengths(x) == 1, x, list(median(unlist(x)))))})

pivot_table_1
#> # A tibble: 31 x 7
#>      Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#>    <int>            <dbl>           <dbl>           <dbl>          <dbl>
#>  1     1                0             440             490           1005
#>  2     2               20             440             490           1005
#>  3     3               35             440             490           1005
#>  4     4               40             440             490           1005
#>  5     5               50             440               0            965
#>  6     6                0             440               0            965
#>  7     7               40             440             490            965
#>  8     8                0             440             490            965
#>  9     9                0             440             490            965
#> 10    10               40             440             490            965
#> # ... with 21 more rows, and 2 more variables: Breakfast <dbl>, Dinner <dbl>

reprex package (v2.0.1)

于 2022-05-22 创建
tidyr 中的

replace_na() 可用于替换列表中的 NULL。 (NULLs 是 list-column 相当于 NAs)

library(tidyverse)

replace_na(df, map(keep(df, is.list), ~ list(median(unlist(.x)))))

# # A tibble: 31 × 7
#      Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working` Breakfast Dinner   
#    <int> <list>           <list>          <list>          <list>         <list>    <list>   
#  1     1 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>      <dbl [1]> <dbl [1]>
#  2     2 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>      <dbl [1]> <dbl [1]>
#  3     3 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>      <dbl [1]> <dbl [1]>
#  4     4 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>      <dbl [1]> <dbl [1]>
#  5     5 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>      <dbl [1]> <dbl [1]>
#  6     6 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>      <dbl [1]> <dbl [1]>
#  7     7 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>      <dbl [1]> <dbl [1]>
#  8     8 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>      <dbl [1]> <dbl [1]>
#  9     9 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>      <dbl [1]> <dbl [1]>
# 10    10 <dbl [1]>        <dbl [1]>       <dbl [1]>       <dbl [1]>      <dbl [1]> <dbl [1]>
# # … with 21 more rows

如果你想让这些 list-column 变平,试试 unnest():

replace_na(df, map(keep(df, is.list), ~ list(median(unlist(.x))))) %>%
  unnest(where(is.list))

# # A tibble: 31 × 7
#      Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working` Breakfast Dinner
#    <int>            <dbl>           <dbl>           <dbl>          <dbl>     <dbl>  <dbl>
#  1     1                0             440             490           1005       690   1390
#  2     2               20             440             490           1005       645   1360
#  3     3               35             440             490           1005       615   1285
#  4     4               40             440             490           1005       540   1270
#  5     5               50             440               0            965       540   1390
#  6     6                0             440               0            965       540   1140
#  7     7               40             440             490            965       540   1140
#  8     8                0             440             490            965       540   1140
#  9     9                0             440             490            965       540   1130
# 10    10               40             440             490            965       540   1135