用 NAs 压平 dplyr 中的小标题
flatten a tibble in dplyr with NAs
我有以下tibble数据,
h <- structure(list(label = list(list(structure(list(id = 431676528L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori",
name = "per sviluppatori", color = "d4c5f9", default = FALSE), .Names = c("id",
"url", "name", "color", "default")), structure(list(id = 442034204L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale",
name = "sito principale", color = "5319e7", default = FALSE), .Names = c("id",
"url", "name", "color", "default"))), list(structure(list(id = 442051239L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/mappa",
name = "mappa", color = "0052cc", default = FALSE), .Names = c("id",
"url", "name", "color", "default")), structure(list(id = 431676528L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori",
name = "per sviluppatori", color = "d4c5f9", default = FALSE), .Names = c("id",
"url", "name", "color", "default")), structure(list(id = 442034204L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale",
name = "sito principale", color = "5319e7", default = FALSE), .Names = c("id",
"url", "name", "color", "default"))), list(NA_character_)), mainId = c("216226960",
"215647494", "242390063")), .Names = c("label", "mainId"), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"))
我想将标签中的值与 mainId
配对,这样我就可以 link 来自 labe
l 的每个 sub-element 及其 主要ID。我正在尝试使用 headers 获得 tibble:label
、url
、name
、color
、mainId
。
的解决方案工作正常,除非 NA
嵌套在 label
的 sub-element 中
map_df(h, flatten_dfr)
Error in bind_rows_(x, .id) : Argument 1 must have names
您可以先过滤掉缺少 label
的 mainId
,然后用 full_join
将它们重新添加(或者简单地 bind_rows
如果您的 mainId
是唯一的)。
library(tidyverse)
h_label_missing <- h %>%
filter(map_lgl(label, ~all(is.na(.)))) %>%
select(-label)
h %>%
filter(!map_lgl(label, ~all(is.na(.)))) %>%
mutate(label = map(label, bind_rows)) %>%
unnest() %>%
full_join(h_label_missing, by = "mainId")
# A tibble: 6 x 6
# mainId id url name color default
# <chr> <int> <chr> <chr> <chr> <lgl>
# 1 216226960 431676528 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori per sviluppatori d4c5f9 F
# 2 216226960 442034204 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale sito principale 5319e7 F
# 3 215647494 442051239 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/mappa mappa 0052cc F
# 4 215647494 431676528 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori per sviluppatori d4c5f9 F
# 5 215647494 442034204 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale sito principale 5319e7 F
# 6 242390063 NA NA NA NA NA
这是一种将仅包含 NA_character_
的元素替换为 NA
列表的方法,该列表的名称类似于第一行的第一个元素。在那之后,bind_rows
和 unnest
将正常工作。
library(tidyverse)
nested_names <- names(pluck(h, 'label', 1, 1))
h2 <- h %>%
mutate(label = map(label, map_if,
~is.null(names(.x)),
~setNames(rep(list(NA), length(nested_names)),
nested_names)),
label = map(label, bind_rows)) %>%
unnest()
h2
#> # A tibble: 6 x 6
#> mainId id url name color default
#> <chr> <int> <chr> <chr> <chr> <lgl>
#> 1 216226960 431676528 https://api.github.com/repos… per svi… d4c5… FALSE
#> 2 216226960 442034204 https://api.github.com/repos… sito pr… 5319… FALSE
#> 3 215647494 442051239 https://api.github.com/repos… mappa 0052… FALSE
#> 4 215647494 431676528 https://api.github.com/repos… per svi… d4c5… FALSE
#> 5 215647494 442034204 https://api.github.com/repos… sito pr… 5319… FALSE
#> 6 242390063 NA <NA> <NA> <NA> NA
我有以下tibble数据,
h <- structure(list(label = list(list(structure(list(id = 431676528L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori",
name = "per sviluppatori", color = "d4c5f9", default = FALSE), .Names = c("id",
"url", "name", "color", "default")), structure(list(id = 442034204L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale",
name = "sito principale", color = "5319e7", default = FALSE), .Names = c("id",
"url", "name", "color", "default"))), list(structure(list(id = 442051239L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/mappa",
name = "mappa", color = "0052cc", default = FALSE), .Names = c("id",
"url", "name", "color", "default")), structure(list(id = 431676528L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori",
name = "per sviluppatori", color = "d4c5f9", default = FALSE), .Names = c("id",
"url", "name", "color", "default")), structure(list(id = 442034204L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale",
name = "sito principale", color = "5319e7", default = FALSE), .Names = c("id",
"url", "name", "color", "default"))), list(NA_character_)), mainId = c("216226960",
"215647494", "242390063")), .Names = c("label", "mainId"), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"))
我想将标签中的值与 mainId
配对,这样我就可以 link 来自 labe
l 的每个 sub-element 及其 主要ID。我正在尝试使用 headers 获得 tibble:label
、url
、name
、color
、mainId
。
NA
嵌套在 label
map_df(h, flatten_dfr)
Error in bind_rows_(x, .id) : Argument 1 must have names
您可以先过滤掉缺少 label
的 mainId
,然后用 full_join
将它们重新添加(或者简单地 bind_rows
如果您的 mainId
是唯一的)。
library(tidyverse)
h_label_missing <- h %>%
filter(map_lgl(label, ~all(is.na(.)))) %>%
select(-label)
h %>%
filter(!map_lgl(label, ~all(is.na(.)))) %>%
mutate(label = map(label, bind_rows)) %>%
unnest() %>%
full_join(h_label_missing, by = "mainId")
# A tibble: 6 x 6
# mainId id url name color default
# <chr> <int> <chr> <chr> <chr> <lgl>
# 1 216226960 431676528 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori per sviluppatori d4c5f9 F
# 2 216226960 442034204 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale sito principale 5319e7 F
# 3 215647494 442051239 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/mappa mappa 0052cc F
# 4 215647494 431676528 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori per sviluppatori d4c5f9 F
# 5 215647494 442034204 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale sito principale 5319e7 F
# 6 242390063 NA NA NA NA NA
这是一种将仅包含 NA_character_
的元素替换为 NA
列表的方法,该列表的名称类似于第一行的第一个元素。在那之后,bind_rows
和 unnest
将正常工作。
library(tidyverse)
nested_names <- names(pluck(h, 'label', 1, 1))
h2 <- h %>%
mutate(label = map(label, map_if,
~is.null(names(.x)),
~setNames(rep(list(NA), length(nested_names)),
nested_names)),
label = map(label, bind_rows)) %>%
unnest()
h2
#> # A tibble: 6 x 6
#> mainId id url name color default
#> <chr> <int> <chr> <chr> <chr> <lgl>
#> 1 216226960 431676528 https://api.github.com/repos… per svi… d4c5… FALSE
#> 2 216226960 442034204 https://api.github.com/repos… sito pr… 5319… FALSE
#> 3 215647494 442051239 https://api.github.com/repos… mappa 0052… FALSE
#> 4 215647494 431676528 https://api.github.com/repos… per svi… d4c5… FALSE
#> 5 215647494 442034204 https://api.github.com/repos… sito pr… 5319… FALSE
#> 6 242390063 NA <NA> <NA> <NA> NA