R中数据框的多级列表
Multi-level list to data frame in R
我想将下面的列表转换为数据框,但我没有成功。
该列表取自 Microsoft Azure 的 API,其中列出了所有资源类型及其技术信息(link:https://docs.microsoft.com/en-us/rest/api/compute/resource-skus/list)。该列表与此类似:
library(tidyverse)
input <- list(value = list(
list(resourceType = "rt1", name = "name1", tier = "tier1", size = "size1", family = "family1", capabilities = list(list(name = "cap_name1", value = "value1_1"), list(name = "cap_name2", value = "value1_2"))),
list(resourceType = "rt1", name = "name2", tier = "tier2", size = "size2", family = "family2", capabilities = list(list(name = "cap_name2", value = "value2_2"), list(name = "cap_name3", value = "value2_3"))),
list(resourceType = "rt1", name = "name3", tier = "tier3", size = "size3", family = "family3", capabilities = list(list(name = "cap_name1", value = "value3_1"), list(name = "cap_name3", value = "value3_3"))),
list(resourceType = "rt1", name = "name4", tier = "tier4", size = "size4", family = "family4", capabilities = list(list(name = "cap_name1", value = "value4_1"), list(name = "cap_name2", value = "value4_2"), list(name = "cap_name3", value = "value4_3"))),
list(resourceType = "rt2", name = "name5", capabilities = list(list(name = "cap_name4", value = "value5_5")))
))
expected_output <-
tibble(
resourceType = c("rt1", "rt1", "rt1", "rt1"),
name = c("name1", "name2", "name3", "name4"),
tier = c("tier1", "tier2", "tier3", "tier4"),
size = c("size1", "size2", "size3", "size4"),
family = c("family1", "family2", "family3", "family4"),
cap_name1 = c("value1_1", NA, "value3_1", "value4_1"),
cap_name2 = c("value1_2", "value2_2", NA, "value4_2"),
cap_name3 = c(NA, "value2_3", "value3_3", "value4_3"),
)
expected_output
#> # A tibble: 4 × 8
#> resourceType name tier size family cap_name1 cap_name2 cap_name3
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 rt1 name1 tier1 size1 family1 value1_1 value1_2 <NA>
#> 2 rt1 name2 tier2 size2 family2 <NA> value2_2 value2_3
#> 3 rt1 name3 tier3 size3 family3 value3_1 <NA> value3_3
#> 4 rt1 name4 tier4 size4 family4 value4_1 value4_2 value4_3
由 reprex package (v2.0.1)
创建于 2022-05-12
我这里有两个问题:
- 我不知道如何只过滤
resourceType == "rt1"
。
我知道如何以这种方式过滤它:
input %>% pluck("value") %>% keep(~.x$resourceType == "rt1")
但我想以某种方式在没有 pluck
步骤的情况下完成。
- 主要问题是将其从
input
转换为expected_output
。
我发现这种列出所有功能的复杂方法:
capabilities <- input %>% pluck("value") %>% keep(~.x$resourceType == "rt1") %>% transpose() %>% as_tibble() %>% pull(capabilities)
all_capabilities_names <- capabilities %>% map_depth(1, ~ map_chr(.x, "name")) %>% purrr::flatten_chr() %>% unique()
all_capabilities_names
#> [1] "cap_name1" "cap_name2" "cap_name3"
由 reprex package (v2.0.1)
创建于 2022-05-12
我被困在那里,因为我不知道如何将 value
映射到正确的列。
使用列表对我来说总是一场噩梦。任何帮助表示赞赏:)
借助一点点 purrr
y 魔法,您可以四处挖掘,在不同级别制作小标题,然后 unnest/reduce
一路向上。稍微玩一下:
library(tidyverse)
input <- list(value = list(
list(resourceType = "rt1", name = "name1", tier = "tier1", size = "size1", family = "family1", capabilities = list(list(name = "cap_name1", value = "value1_1"), list(name = "cap_name2", value = "value1_2"))),
list(resourceType = "rt1", name = "name2", tier = "tier2", size = "size2", family = "family2", capabilities = list(list(name = "cap_name2", value = "value2_2"), list(name = "cap_name3", value = "value2_3"))),
list(resourceType = "rt1", name = "name3", tier = "tier3", size = "size3", family = "family3", capabilities = list(list(name = "cap_name1", value = "value3_1"), list(name = "cap_name3", value = "value3_3"))),
list(resourceType = "rt1", name = "name4", tier = "tier4", size = "size4", family = "family4", capabilities = list(list(name = "cap_name1", value = "value4_1"), list(name = "cap_name2", value = "value4_2"), list(name = "cap_name3", value = "value4_3"))),
list(resourceType = "rt2", name = "name5", capabilities = list(list(name = "cap_name4", value = "value5_4")))
))
output_test <- input[[1]] %>%
map(as_tibble) %>%
reduce(bind_rows) %>%
mutate(capabilities = map(capabilities, as_tibble)) %>%
unnest(capabilities, names_repair = "unique") %>%
filter(resourceType == "rt1") %>%
pivot_wider(names_from = `name...6`, values_from = value) %>%
rename(name = `name...2`)
#> New names:
#> • `name` -> `name...2`
#> • `name` -> `name...6`
output_test
#> # A tibble: 4 × 8
#> resourceType name tier size family cap_name1 cap_name2 cap_name3
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 rt1 name1 tier1 size1 family1 value1_1 value1_2 <NA>
#> 2 rt1 name2 tier2 size2 family2 <NA> value2_2 value2_3
#> 3 rt1 name3 tier3 size3 family3 value3_1 <NA> value3_3
#> 4 rt1 name4 tier4 size4 family4 value4_1 value4_2 value4_3
看看它是否有效:
expected_output <-
tibble(
resourceType = c("rt1", "rt1", "rt1", "rt1"),
name = c("name1", "name2", "name3", "name4"),
tier = c("tier1", "tier2", "tier3", "tier4"),
size = c("size1", "size2", "size3", "size4"),
family = c("family1", "family2", "family3", "family4"),
cap_name1 = c("value1_1", NA, "value3_1", "value4_1"),
cap_name2 = c("value1_2", "value2_2", NA, "value4_2"),
cap_name3 = c(NA, "value2_3", "value3_3", "value4_3"),
)
assertthat::are_equal(expected_output, output_test)
#> [1] TRUE
编辑 - 遇到另一个错误
如果其中一个列表中有一个空向量,则转向 tibble 将不起作用。您可以丢弃该向量,当将所有行绑定在一起时它将编码为 NA
:
library(tidyverse)
input <- list(value = list(
list(resourceType = "rt1", name = "name1", tier = vector("character"), size = "size1", family = "family1", capabilities = list(list(name = "cap_name1", value = "value1_1"), list(name = "cap_name2", value = "value1_2"))),
list(resourceType = "rt1", name = "name2", tier = "tier2", size = "size2", family = "family2", capabilities = list(list(name = "cap_name2", value = "value2_2"), list(name = "cap_name3", value = "value2_3"))),
list(resourceType = "rt1", name = "name3", tier = "tier3", size = "size3", family = "family3", capabilities = list(list(name = "cap_name1", value = "value3_1"), list(name = "cap_name3", value = "value3_3"))),
list(resourceType = "rt1", name = "name4", tier = "tier4", size = "size4", family = "family4", capabilities = list(list(name = "cap_name1", value = "value4_1"), list(name = "cap_name2", value = "value4_2"), list(name = "cap_name3", value = "value4_3"))),
list(resourceType = "rt2", name = "name5", capabilities = list(list(name = "cap_name4", value = "value5_4")))
))
input$value %>%
map(~ discard(.x, is_empty) %>% as_tibble) %>%
reduce(bind_rows) %>%
mutate(capabilities = map(capabilities, as_tibble)) %>%
rename(value_name = name) %>%
unnest(capabilities, names_repair = "unique") %>%
filter(resourceType == "rt1") %>%
pivot_wider(names_from = `name`, values_from = value)
#> # A tibble: 4 × 8
#> resourceType value_name size family tier cap_name1 cap_name2 cap_name3
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 rt1 name1 size1 family1 <NA> value1_1 value1_2 <NA>
#> 2 rt1 name2 size2 family2 tier2 <NA> value2_2 value2_3
#> 3 rt1 name3 size3 family3 tier3 value3_1 <NA> value3_3
#> 4 rt1 name4 size4 family4 tier4 value4_1 value4_2 value4_3
由 reprex package (v2.0.1)
创建于 2022-05-14
我想将下面的列表转换为数据框,但我没有成功。
该列表取自 Microsoft Azure 的 API,其中列出了所有资源类型及其技术信息(link:https://docs.microsoft.com/en-us/rest/api/compute/resource-skus/list)。该列表与此类似:
library(tidyverse)
input <- list(value = list(
list(resourceType = "rt1", name = "name1", tier = "tier1", size = "size1", family = "family1", capabilities = list(list(name = "cap_name1", value = "value1_1"), list(name = "cap_name2", value = "value1_2"))),
list(resourceType = "rt1", name = "name2", tier = "tier2", size = "size2", family = "family2", capabilities = list(list(name = "cap_name2", value = "value2_2"), list(name = "cap_name3", value = "value2_3"))),
list(resourceType = "rt1", name = "name3", tier = "tier3", size = "size3", family = "family3", capabilities = list(list(name = "cap_name1", value = "value3_1"), list(name = "cap_name3", value = "value3_3"))),
list(resourceType = "rt1", name = "name4", tier = "tier4", size = "size4", family = "family4", capabilities = list(list(name = "cap_name1", value = "value4_1"), list(name = "cap_name2", value = "value4_2"), list(name = "cap_name3", value = "value4_3"))),
list(resourceType = "rt2", name = "name5", capabilities = list(list(name = "cap_name4", value = "value5_5")))
))
expected_output <-
tibble(
resourceType = c("rt1", "rt1", "rt1", "rt1"),
name = c("name1", "name2", "name3", "name4"),
tier = c("tier1", "tier2", "tier3", "tier4"),
size = c("size1", "size2", "size3", "size4"),
family = c("family1", "family2", "family3", "family4"),
cap_name1 = c("value1_1", NA, "value3_1", "value4_1"),
cap_name2 = c("value1_2", "value2_2", NA, "value4_2"),
cap_name3 = c(NA, "value2_3", "value3_3", "value4_3"),
)
expected_output
#> # A tibble: 4 × 8
#> resourceType name tier size family cap_name1 cap_name2 cap_name3
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 rt1 name1 tier1 size1 family1 value1_1 value1_2 <NA>
#> 2 rt1 name2 tier2 size2 family2 <NA> value2_2 value2_3
#> 3 rt1 name3 tier3 size3 family3 value3_1 <NA> value3_3
#> 4 rt1 name4 tier4 size4 family4 value4_1 value4_2 value4_3
由 reprex package (v2.0.1)
创建于 2022-05-12我这里有两个问题:
- 我不知道如何只过滤
resourceType == "rt1"
。 我知道如何以这种方式过滤它:
input %>% pluck("value") %>% keep(~.x$resourceType == "rt1")
但我想以某种方式在没有 pluck
步骤的情况下完成。
- 主要问题是将其从
input
转换为expected_output
。 我发现这种列出所有功能的复杂方法:
capabilities <- input %>% pluck("value") %>% keep(~.x$resourceType == "rt1") %>% transpose() %>% as_tibble() %>% pull(capabilities)
all_capabilities_names <- capabilities %>% map_depth(1, ~ map_chr(.x, "name")) %>% purrr::flatten_chr() %>% unique()
all_capabilities_names
#> [1] "cap_name1" "cap_name2" "cap_name3"
由 reprex package (v2.0.1)
创建于 2022-05-12我被困在那里,因为我不知道如何将 value
映射到正确的列。
使用列表对我来说总是一场噩梦。任何帮助表示赞赏:)
借助一点点 purrr
y 魔法,您可以四处挖掘,在不同级别制作小标题,然后 unnest/reduce
一路向上。稍微玩一下:
library(tidyverse)
input <- list(value = list(
list(resourceType = "rt1", name = "name1", tier = "tier1", size = "size1", family = "family1", capabilities = list(list(name = "cap_name1", value = "value1_1"), list(name = "cap_name2", value = "value1_2"))),
list(resourceType = "rt1", name = "name2", tier = "tier2", size = "size2", family = "family2", capabilities = list(list(name = "cap_name2", value = "value2_2"), list(name = "cap_name3", value = "value2_3"))),
list(resourceType = "rt1", name = "name3", tier = "tier3", size = "size3", family = "family3", capabilities = list(list(name = "cap_name1", value = "value3_1"), list(name = "cap_name3", value = "value3_3"))),
list(resourceType = "rt1", name = "name4", tier = "tier4", size = "size4", family = "family4", capabilities = list(list(name = "cap_name1", value = "value4_1"), list(name = "cap_name2", value = "value4_2"), list(name = "cap_name3", value = "value4_3"))),
list(resourceType = "rt2", name = "name5", capabilities = list(list(name = "cap_name4", value = "value5_4")))
))
output_test <- input[[1]] %>%
map(as_tibble) %>%
reduce(bind_rows) %>%
mutate(capabilities = map(capabilities, as_tibble)) %>%
unnest(capabilities, names_repair = "unique") %>%
filter(resourceType == "rt1") %>%
pivot_wider(names_from = `name...6`, values_from = value) %>%
rename(name = `name...2`)
#> New names:
#> • `name` -> `name...2`
#> • `name` -> `name...6`
output_test
#> # A tibble: 4 × 8
#> resourceType name tier size family cap_name1 cap_name2 cap_name3
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 rt1 name1 tier1 size1 family1 value1_1 value1_2 <NA>
#> 2 rt1 name2 tier2 size2 family2 <NA> value2_2 value2_3
#> 3 rt1 name3 tier3 size3 family3 value3_1 <NA> value3_3
#> 4 rt1 name4 tier4 size4 family4 value4_1 value4_2 value4_3
看看它是否有效:
expected_output <-
tibble(
resourceType = c("rt1", "rt1", "rt1", "rt1"),
name = c("name1", "name2", "name3", "name4"),
tier = c("tier1", "tier2", "tier3", "tier4"),
size = c("size1", "size2", "size3", "size4"),
family = c("family1", "family2", "family3", "family4"),
cap_name1 = c("value1_1", NA, "value3_1", "value4_1"),
cap_name2 = c("value1_2", "value2_2", NA, "value4_2"),
cap_name3 = c(NA, "value2_3", "value3_3", "value4_3"),
)
assertthat::are_equal(expected_output, output_test)
#> [1] TRUE
编辑 - 遇到另一个错误
如果其中一个列表中有一个空向量,则转向 tibble 将不起作用。您可以丢弃该向量,当将所有行绑定在一起时它将编码为 NA
:
library(tidyverse)
input <- list(value = list(
list(resourceType = "rt1", name = "name1", tier = vector("character"), size = "size1", family = "family1", capabilities = list(list(name = "cap_name1", value = "value1_1"), list(name = "cap_name2", value = "value1_2"))),
list(resourceType = "rt1", name = "name2", tier = "tier2", size = "size2", family = "family2", capabilities = list(list(name = "cap_name2", value = "value2_2"), list(name = "cap_name3", value = "value2_3"))),
list(resourceType = "rt1", name = "name3", tier = "tier3", size = "size3", family = "family3", capabilities = list(list(name = "cap_name1", value = "value3_1"), list(name = "cap_name3", value = "value3_3"))),
list(resourceType = "rt1", name = "name4", tier = "tier4", size = "size4", family = "family4", capabilities = list(list(name = "cap_name1", value = "value4_1"), list(name = "cap_name2", value = "value4_2"), list(name = "cap_name3", value = "value4_3"))),
list(resourceType = "rt2", name = "name5", capabilities = list(list(name = "cap_name4", value = "value5_4")))
))
input$value %>%
map(~ discard(.x, is_empty) %>% as_tibble) %>%
reduce(bind_rows) %>%
mutate(capabilities = map(capabilities, as_tibble)) %>%
rename(value_name = name) %>%
unnest(capabilities, names_repair = "unique") %>%
filter(resourceType == "rt1") %>%
pivot_wider(names_from = `name`, values_from = value)
#> # A tibble: 4 × 8
#> resourceType value_name size family tier cap_name1 cap_name2 cap_name3
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 rt1 name1 size1 family1 <NA> value1_1 value1_2 <NA>
#> 2 rt1 name2 size2 family2 tier2 <NA> value2_2 value2_3
#> 3 rt1 name3 size3 family3 tier3 value3_1 <NA> value3_3
#> 4 rt1 name4 size4 family4 tier4 value4_1 value4_2 value4_3
由 reprex package (v2.0.1)
创建于 2022-05-14