用列的中位数替换R中Dataframe中的NULL值
Replacing NULL value in Dataframe in R with Median of Column
我有一个包含多个 NULL
值的数据框。 class 类型的列是 LIST
而不是 NUMERIC
。是否可以用列的中值替换所有 NULL
值?我尝试了一种手动方法,即使用 as.numeric()
函数将第 1 列的 NULL
值更改为 1,然后应用 median()
函数。有没有更有效的方法来做到这一点?
i1 <- sapply(pivot_table_1$`Start Working`, is.null)
pivot_table_1$`Start Working`[i1] <- 0
来自 dput()
的输出:
structure(list(Day = 1:31, `Start Sleeping` = list(0, 20, 35,
40, 50, 0, 40, 0, 0, 40, 50, 0, 0, 40, 0, 40, 35, 45, 0,
0, 65, 35, 40, 40, 0, 50, 40, 0, 0, 0, 0), `Stop Sleeping` = list(
440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440,
440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440,
440, 440, 440, 440, 440, 440, 440), `Start Working` = list(
490, 490, 490, 490, 0, 0, 490, 490, 490, 490, 490, 0, 0,
490, 490, 490, 490, 490, 0, 0, 490, 490, 490, 490, 490, 0,
0, 490, 490, 490, 490), `Stop Working` = list(1005, 1005,
1005, 1005, NULL, NULL, 965, 965, 965, 965, 965, NULL, NULL,
965, 965, 965, 965, 965, NULL, NULL, 965, 965, 965, 965,
965, NULL, NULL, 965, 965, 965, 965), Breakfast = list(690,
645, 615, 540, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
475, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 475, NULL,
NULL, NULL, NULL, NULL, 475, NULL, NULL, NULL, NULL, NULL),
Dinner = list(1390, 1360, 1285, 1270, 1390, NULL, 1140, 1140,
1130, 1135, 1130, NULL, 1165, 1140, 1130, 1135, 1130,
1140, 1140, 1180, NULL, 1145, 1135, 1140, 1135, 1160,
1140, 1140, NULL, 1140, NULL)), row.names = c(NA, -31L
), class = c("tbl_df", "tbl", "data.frame"))
如果您希望将条目保留为 length-one 列表,您可以这样做:
pivot_table_1[] <- lapply(pivot_table_1, function(x) {
ifelse(lengths(x) == 1, x, list(median(unlist(x))))})
pivot_table_1
#> # A tibble: 31 x 7
#> Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#> <int> <list> <list> <list> <list>
#> 1 1 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 2 2 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 3 3 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 4 4 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 5 5 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 6 6 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 7 7 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 8 8 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 9 9 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 10 10 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> # ... with 21 more rows, and 2 more variables: Breakfast <list>, Dinner <list>
或者,如果您希望它们作为数字列,请执行以下操作:
pivot_table_1[] <- lapply(pivot_table_1, function(x) {
unlist(ifelse(lengths(x) == 1, x, list(median(unlist(x)))))})
pivot_table_1
#> # A tibble: 31 x 7
#> Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0 440 490 1005
#> 2 2 20 440 490 1005
#> 3 3 35 440 490 1005
#> 4 4 40 440 490 1005
#> 5 5 50 440 0 965
#> 6 6 0 440 0 965
#> 7 7 40 440 490 965
#> 8 8 0 440 490 965
#> 9 9 0 440 490 965
#> 10 10 40 440 490 965
#> # ... with 21 more rows, and 2 more variables: Breakfast <dbl>, Dinner <dbl>
由 reprex package (v2.0.1)
于 2022-05-22 创建
tidyr
中的 replace_na()
可用于替换列表中的 NULL
。 (NULL
s 是 list-column 相当于 NA
s)
library(tidyverse)
replace_na(df, map(keep(df, is.list), ~ list(median(unlist(.x)))))
# # A tibble: 31 × 7
# Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working` Breakfast Dinner
# <int> <list> <list> <list> <list> <list> <list>
# 1 1 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 2 2 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 3 3 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 4 4 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 5 5 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 6 6 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 7 7 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 8 8 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 9 9 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 10 10 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# # … with 21 more rows
如果你想让这些 list-column 变平,试试 unnest()
:
replace_na(df, map(keep(df, is.list), ~ list(median(unlist(.x))))) %>%
unnest(where(is.list))
# # A tibble: 31 × 7
# Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working` Breakfast Dinner
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 0 440 490 1005 690 1390
# 2 2 20 440 490 1005 645 1360
# 3 3 35 440 490 1005 615 1285
# 4 4 40 440 490 1005 540 1270
# 5 5 50 440 0 965 540 1390
# 6 6 0 440 0 965 540 1140
# 7 7 40 440 490 965 540 1140
# 8 8 0 440 490 965 540 1140
# 9 9 0 440 490 965 540 1130
# 10 10 40 440 490 965 540 1135
我有一个包含多个 NULL
值的数据框。 class 类型的列是 LIST
而不是 NUMERIC
。是否可以用列的中值替换所有 NULL
值?我尝试了一种手动方法,即使用 as.numeric()
函数将第 1 列的 NULL
值更改为 1,然后应用 median()
函数。有没有更有效的方法来做到这一点?
i1 <- sapply(pivot_table_1$`Start Working`, is.null)
pivot_table_1$`Start Working`[i1] <- 0
来自 dput()
的输出:
structure(list(Day = 1:31, `Start Sleeping` = list(0, 20, 35,
40, 50, 0, 40, 0, 0, 40, 50, 0, 0, 40, 0, 40, 35, 45, 0,
0, 65, 35, 40, 40, 0, 50, 40, 0, 0, 0, 0), `Stop Sleeping` = list(
440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440,
440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440,
440, 440, 440, 440, 440, 440, 440), `Start Working` = list(
490, 490, 490, 490, 0, 0, 490, 490, 490, 490, 490, 0, 0,
490, 490, 490, 490, 490, 0, 0, 490, 490, 490, 490, 490, 0,
0, 490, 490, 490, 490), `Stop Working` = list(1005, 1005,
1005, 1005, NULL, NULL, 965, 965, 965, 965, 965, NULL, NULL,
965, 965, 965, 965, 965, NULL, NULL, 965, 965, 965, 965,
965, NULL, NULL, 965, 965, 965, 965), Breakfast = list(690,
645, 615, 540, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
475, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 475, NULL,
NULL, NULL, NULL, NULL, 475, NULL, NULL, NULL, NULL, NULL),
Dinner = list(1390, 1360, 1285, 1270, 1390, NULL, 1140, 1140,
1130, 1135, 1130, NULL, 1165, 1140, 1130, 1135, 1130,
1140, 1140, 1180, NULL, 1145, 1135, 1140, 1135, 1160,
1140, 1140, NULL, 1140, NULL)), row.names = c(NA, -31L
), class = c("tbl_df", "tbl", "data.frame"))
如果您希望将条目保留为 length-one 列表,您可以这样做:
pivot_table_1[] <- lapply(pivot_table_1, function(x) {
ifelse(lengths(x) == 1, x, list(median(unlist(x))))})
pivot_table_1
#> # A tibble: 31 x 7
#> Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#> <int> <list> <list> <list> <list>
#> 1 1 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 2 2 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 3 3 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 4 4 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 5 5 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 6 6 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 7 7 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 8 8 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 9 9 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 10 10 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> # ... with 21 more rows, and 2 more variables: Breakfast <list>, Dinner <list>
或者,如果您希望它们作为数字列,请执行以下操作:
pivot_table_1[] <- lapply(pivot_table_1, function(x) {
unlist(ifelse(lengths(x) == 1, x, list(median(unlist(x)))))})
pivot_table_1
#> # A tibble: 31 x 7
#> Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0 440 490 1005
#> 2 2 20 440 490 1005
#> 3 3 35 440 490 1005
#> 4 4 40 440 490 1005
#> 5 5 50 440 0 965
#> 6 6 0 440 0 965
#> 7 7 40 440 490 965
#> 8 8 0 440 490 965
#> 9 9 0 440 490 965
#> 10 10 40 440 490 965
#> # ... with 21 more rows, and 2 more variables: Breakfast <dbl>, Dinner <dbl>
由 reprex package (v2.0.1)
于 2022-05-22 创建tidyr
中的 replace_na()
可用于替换列表中的 NULL
。 (NULL
s 是 list-column 相当于 NA
s)
library(tidyverse)
replace_na(df, map(keep(df, is.list), ~ list(median(unlist(.x)))))
# # A tibble: 31 × 7
# Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working` Breakfast Dinner
# <int> <list> <list> <list> <list> <list> <list>
# 1 1 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 2 2 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 3 3 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 4 4 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 5 5 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 6 6 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 7 7 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 8 8 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 9 9 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 10 10 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# # … with 21 more rows
如果你想让这些 list-column 变平,试试 unnest()
:
replace_na(df, map(keep(df, is.list), ~ list(median(unlist(.x))))) %>%
unnest(where(is.list))
# # A tibble: 31 × 7
# Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working` Breakfast Dinner
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 0 440 490 1005 690 1390
# 2 2 20 440 490 1005 645 1360
# 3 3 35 440 490 1005 615 1285
# 4 4 40 440 490 1005 540 1270
# 5 5 50 440 0 965 540 1390
# 6 6 0 440 0 965 540 1140
# 7 7 40 440 490 965 540 1140
# 8 8 0 440 490 965 540 1140
# 9 9 0 440 490 965 540 1130
# 10 10 40 440 490 965 540 1135