在小标题中取消嵌套不同深度的列表
Unnesting lists of different depth in a tibble
我有一个 tibble,它在不同的列中嵌套了不同深度的列表。每个列表中只有一个值或者为 NULL。我如何将这些提取到具有行和列单个值的普通 tibble/dataframe。
由于列表的嵌套深度,我无法使 purrr:map_*
工作。
我尝试使用 map2_*
取消列出,但它不考虑空值。
如有任何帮助,我们将不胜感激。
tibble
data <- structure(list(Day = list("1", "1", "1", "1", "2", "2", "4",
"4", "3", "5", "2", "3", "2", "1", "3", "3", "5", "4", "2",
"4", "4", "4", "5", "3", "1", "3", "4", "4", "1", "3", "3",
"3", "2", "2", "1", "4", "5", "2", "3", "3", "4"), TimeOfDay = list(
10L, 11L, 13L, 14L, 16L, 15L, 8L, 20L, 7L, 13L, 8L, 14L,
12L, 12L, 15L, 9L, 11L, 12L, 13L, 15L, 13L, 11L, 9L, 17L,
15L, 13L, 14L, 7L, 4L, 8L, 16L, 11L, 9L, 11L, 9L, 9L, 10L,
10L, 10L, 12L, 10L), team = list("pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil"), median = list(605.1044, 56.6954,
221.6688, 424.6239, 95.7121, 389.2422, 236.3484, 56.1632,
103.9477, 44.6205, 68.6362, 158.2934, 52.6557, 77.3802, 111.1602,
211.4475, 396.8566, 79.3398, 94.1856, 0.381, 28.8757, 5766.7778,
319.767, 304.6234, 224.6323, 47.9941, 236.2954, 161.6516,
69.3141, 0.3363, 297.0771, 8109.1642, 494.5835, 72.0297,
14.389, 228.7122, 209.5832, 28.0984, 91.9362, 36.2796, 156.4385),
Eightyperc = list(784.9551, 7639.3023, 1158.3115, 424.6239,
169.3091, 1131.0486, 514.5908, 56.1632, 103.9477, 68.9684,
70.2621, 636.0393, 262.0507, 963.6554, 310.2544, 581.9811,
8747.5797, 79.3398, 479.218, 0.381, 20195.5093, 5766.7778,
242308.6155, 304.6234, 866.1944, 68.8479, 1180.9717,
161.6516, 69.3141, 0.3363, 1370.3561, 8493.6893, 4425.4103,
1127.0802, 240.395, 30630.0465, 974.4312, 508.2495, 91.9362,
36.2796, 754.9999), avg_duration = list(467.740033333333,
3847.99885, 615.63206, 424.6239, 132.5106, 577.7318,
393.14646, 56.1632, 103.9477, 41.4517333333333, 69.44915,
1293.8016, 157.3532, 726.271625, 220.406616666667, 296.056622222222,
4572.21815, 79.3398, 206.1527, 0.381, 5162.30205, 5766.7778,
80979.7336333333, 304.6234, 1021.4285, 57.4844857142857,
934.53983, 161.6516, 69.3141, 0.3363, 994.15694, 5539.36966666667,
2396.68795714286, 367.990935714286, 127.392, 8105.9991,
1146.64592, 418.222866666667, 91.9362, 36.2796, 377.54555),
Purpose = list("Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target"),
HourOfWeek = list(34L, 35L, 37L, 38L, 64L, 63L, 104L, 116L,
79L, 133L, 56L, 86L, 60L, 36L, 87L, 81L, 131L, 108L,
61L, 111L, 109L, 107L, 129L, 89L, 39L, 85L, 110L, 103L,
28L, 80L, 88L, 83L, 57L, 59L, 33L, 105L, 130L, 58L, 82L,
84L, 106L), Day1 = list("1", "1", "1", "1", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL), TimeOfDay1 = list(
10L, 11L, 13L, 14L, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL), team1 = list("pupil", "pupil",
"pupil", "pupil", NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL), median1 = list(874.537, 1070.7642,
1.3133, 502.7748, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL), Eightyperc1 = list(2287.3256,
2114.1153, 1.3133, 502.7748, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL), avg_duration1 = list(
1441.39973333333, 1129.34656666667, 1.3133, 502.7748,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL), Purpose1 = list("Actual", "Actual", "Actual",
"Actual", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL), HourOfWeek1 = list(34L, 35L, 37L, 38L, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)), .Names = ("Day","TimeOfDay", "team", "median", "Eightyperc", "avg_duration", "Purpose", "HourOfWeek", "Day1", "TimeOfDay1", "team1", "median1", "Eightyperc1", "avg_duration1", "Purpose1", "HourOfWeek1"), row.names = c(NA, -41L),class =c("tbl_df", "tbl", "data.frame"))
问题是您的 list
列中有 NULL
个条目,当您 unlist
.
时,这些条目将被删除
解决方法是重新转换列 as.character
,将 NULL
转换为 "NULL"
条目,然后对所有数字列执行 as.numeric
。这将抛出一些可以忽略的警告,这些警告源于 "NULL"
到 numeric
条目的转换失败,导致 NA
s.
data %>%
mutate_all(as.character) %>%
mutate_at(vars(-team, -Purpose, -team1, -Purpose1), as.numeric);
## A tibble: 41 x 16
# Day TimeOfDay team median Eightyperc avg_duration Purpose HourOfWeek
# <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl>
# 1 1. 10. pupil 605. 785. 468. Target 34.
# 2 1. 11. pupil 56.7 7639. 3848. Target 35.
# 3 1. 13. pupil 222. 1158. 616. Target 37.
# 4 1. 14. pupil 425. 425. 425. Target 38.
# 5 2. 16. pupil 95.7 169. 133. Target 64.
# 6 2. 15. pupil 389. 1131. 578. Target 63.
# 7 4. 8. pupil 236. 515. 393. Target 104.
# 8 4. 20. pupil 56.2 56.2 56.2 Target 116.
# 9 3. 7. pupil 104. 104. 104. Target 79.
#10 5. 13. pupil 44.6 69.0 41.5 Target 133.
## ... with 31 more rows, and 8 more variables: Day1 <dbl>, TimeOfDay1 <dbl>,
## team1 <chr>, median1 <dbl>, Eightyperc1 <dbl>, avg_duration1 <dbl>,
## Purpose1 <chr>, HourOfWeek1 <dbl>
PS。您提供的示例数据由于缺少 c(...)
而引发错误:.Names = ("Day", ...
应该是 .Names = c("Day", ...
.
我有一个 tibble,它在不同的列中嵌套了不同深度的列表。每个列表中只有一个值或者为 NULL。我如何将这些提取到具有行和列单个值的普通 tibble/dataframe。
由于列表的嵌套深度,我无法使 purrr:map_*
工作。
我尝试使用 map2_*
取消列出,但它不考虑空值。
如有任何帮助,我们将不胜感激。
tibble
data <- structure(list(Day = list("1", "1", "1", "1", "2", "2", "4",
"4", "3", "5", "2", "3", "2", "1", "3", "3", "5", "4", "2",
"4", "4", "4", "5", "3", "1", "3", "4", "4", "1", "3", "3",
"3", "2", "2", "1", "4", "5", "2", "3", "3", "4"), TimeOfDay = list(
10L, 11L, 13L, 14L, 16L, 15L, 8L, 20L, 7L, 13L, 8L, 14L,
12L, 12L, 15L, 9L, 11L, 12L, 13L, 15L, 13L, 11L, 9L, 17L,
15L, 13L, 14L, 7L, 4L, 8L, 16L, 11L, 9L, 11L, 9L, 9L, 10L,
10L, 10L, 12L, 10L), team = list("pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil"), median = list(605.1044, 56.6954,
221.6688, 424.6239, 95.7121, 389.2422, 236.3484, 56.1632,
103.9477, 44.6205, 68.6362, 158.2934, 52.6557, 77.3802, 111.1602,
211.4475, 396.8566, 79.3398, 94.1856, 0.381, 28.8757, 5766.7778,
319.767, 304.6234, 224.6323, 47.9941, 236.2954, 161.6516,
69.3141, 0.3363, 297.0771, 8109.1642, 494.5835, 72.0297,
14.389, 228.7122, 209.5832, 28.0984, 91.9362, 36.2796, 156.4385),
Eightyperc = list(784.9551, 7639.3023, 1158.3115, 424.6239,
169.3091, 1131.0486, 514.5908, 56.1632, 103.9477, 68.9684,
70.2621, 636.0393, 262.0507, 963.6554, 310.2544, 581.9811,
8747.5797, 79.3398, 479.218, 0.381, 20195.5093, 5766.7778,
242308.6155, 304.6234, 866.1944, 68.8479, 1180.9717,
161.6516, 69.3141, 0.3363, 1370.3561, 8493.6893, 4425.4103,
1127.0802, 240.395, 30630.0465, 974.4312, 508.2495, 91.9362,
36.2796, 754.9999), avg_duration = list(467.740033333333,
3847.99885, 615.63206, 424.6239, 132.5106, 577.7318,
393.14646, 56.1632, 103.9477, 41.4517333333333, 69.44915,
1293.8016, 157.3532, 726.271625, 220.406616666667, 296.056622222222,
4572.21815, 79.3398, 206.1527, 0.381, 5162.30205, 5766.7778,
80979.7336333333, 304.6234, 1021.4285, 57.4844857142857,
934.53983, 161.6516, 69.3141, 0.3363, 994.15694, 5539.36966666667,
2396.68795714286, 367.990935714286, 127.392, 8105.9991,
1146.64592, 418.222866666667, 91.9362, 36.2796, 377.54555),
Purpose = list("Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target"),
HourOfWeek = list(34L, 35L, 37L, 38L, 64L, 63L, 104L, 116L,
79L, 133L, 56L, 86L, 60L, 36L, 87L, 81L, 131L, 108L,
61L, 111L, 109L, 107L, 129L, 89L, 39L, 85L, 110L, 103L,
28L, 80L, 88L, 83L, 57L, 59L, 33L, 105L, 130L, 58L, 82L,
84L, 106L), Day1 = list("1", "1", "1", "1", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL), TimeOfDay1 = list(
10L, 11L, 13L, 14L, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL), team1 = list("pupil", "pupil",
"pupil", "pupil", NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL), median1 = list(874.537, 1070.7642,
1.3133, 502.7748, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL), Eightyperc1 = list(2287.3256,
2114.1153, 1.3133, 502.7748, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL), avg_duration1 = list(
1441.39973333333, 1129.34656666667, 1.3133, 502.7748,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL), Purpose1 = list("Actual", "Actual", "Actual",
"Actual", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL), HourOfWeek1 = list(34L, 35L, 37L, 38L, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)), .Names = ("Day","TimeOfDay", "team", "median", "Eightyperc", "avg_duration", "Purpose", "HourOfWeek", "Day1", "TimeOfDay1", "team1", "median1", "Eightyperc1", "avg_duration1", "Purpose1", "HourOfWeek1"), row.names = c(NA, -41L),class =c("tbl_df", "tbl", "data.frame"))
问题是您的 list
列中有 NULL
个条目,当您 unlist
.
解决方法是重新转换列 as.character
,将 NULL
转换为 "NULL"
条目,然后对所有数字列执行 as.numeric
。这将抛出一些可以忽略的警告,这些警告源于 "NULL"
到 numeric
条目的转换失败,导致 NA
s.
data %>%
mutate_all(as.character) %>%
mutate_at(vars(-team, -Purpose, -team1, -Purpose1), as.numeric);
## A tibble: 41 x 16
# Day TimeOfDay team median Eightyperc avg_duration Purpose HourOfWeek
# <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl>
# 1 1. 10. pupil 605. 785. 468. Target 34.
# 2 1. 11. pupil 56.7 7639. 3848. Target 35.
# 3 1. 13. pupil 222. 1158. 616. Target 37.
# 4 1. 14. pupil 425. 425. 425. Target 38.
# 5 2. 16. pupil 95.7 169. 133. Target 64.
# 6 2. 15. pupil 389. 1131. 578. Target 63.
# 7 4. 8. pupil 236. 515. 393. Target 104.
# 8 4. 20. pupil 56.2 56.2 56.2 Target 116.
# 9 3. 7. pupil 104. 104. 104. Target 79.
#10 5. 13. pupil 44.6 69.0 41.5 Target 133.
## ... with 31 more rows, and 8 more variables: Day1 <dbl>, TimeOfDay1 <dbl>,
## team1 <chr>, median1 <dbl>, Eightyperc1 <dbl>, avg_duration1 <dbl>,
## Purpose1 <chr>, HourOfWeek1 <dbl>
PS。您提供的示例数据由于缺少 c(...)
而引发错误:.Names = ("Day", ...
应该是 .Names = c("Day", ...
.