R 中的 pivot_*() 函数
pivot_*() function in R
我正在尝试创建一个 tibble
,其中包含 property_id
和其中包含 true 或 false 值的设施列。我尝试了以下但无法继续进行。
properties %>%
select(property_id, facilities) %>%
separate(facilities, into = paste0("f", 1:9), sep = ",")
> dput(head(properties))
structure(list(property_id = c(2668, 4656, 4563, 4088, 2188,
4171), destination = c("Brisbane", "Brisbane", "Brisbane", "Brisbane",
"Brisbane", "Brisbane"), property_type = c("Hotel", "Hotel",
"Apartment", "Apartment", "Apartment", "Apartment"), nr_rooms = c(32,
39, 9, 9, 4, 5), facilities = c("airport shuttle,free wifi,garden,breakfast,pool,on-site restaurant",
"on-site restaurant,pool,airport shuttle,breakfast,bbq,free wifi,spa",
"laundry", "kitchen,laundry,free wifi", "parking,kitchen,bbq,free wifi,game console",
"kitchen,pool,laundry,parking,free wifi,garden")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
我们将 'facilities' 列拆分为 ,
,使用 qdapTools
中的 mtabulate
来获取 list
中每个唯一元素的计数,转换为逻辑矩阵 (> 0
) 和 cbind
'property_id' 列
library(qdapTools)
cbind(properties['property_id'], mtabulate(strsplit(properties$facilities, ",")) > 0)
-输出
property_id airport shuttle bbq breakfast free wifi game console garden kitchen laundry on-site restaurant parking pool spa
1 2668 TRUE FALSE TRUE TRUE FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE
2 4656 TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE FALSE TRUE TRUE
3 4563 FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
4 4088 FALSE FALSE FALSE TRUE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
5 2188 FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE
6 4171 FALSE FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE
>
在tidyr
中我们可以使用separate_rows
和pivot_wider
properties <- structure(list(property_id = c(2668, 4656, 4563, 4088, 2188,
4171), destination = c("Brisbane", "Brisbane", "Brisbane", "Brisbane",
"Brisbane", "Brisbane"), property_type = c("Hotel", "Hotel",
"Apartment", "Apartment", "Apartment", "Apartment"), nr_rooms = c(32,
39, 9, 9, 4, 5), facilities = c("airport shuttle,free wifi,garden,breakfast,pool,on-site restaurant",
"on-site restaurant,pool,airport shuttle,breakfast,bbq,free wifi,spa",
"laundry", "kitchen,laundry,free wifi", "parking,kitchen,bbq,free wifi,game console",
"kitchen,pool,laundry,parking,free wifi,garden")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
suppressMessages(library(tidyverse))
properties %>% separate_rows(facilities, sep = ',') %>%
pivot_wider(id_cols = c(property_id, destination, property_type), names_from = facilities, values_from = nr_rooms,
values_fn = function(x) is.numeric(x), values_fill = 0)
#> # A tibble: 6 x 15
#> property_id destination property_type `airport shuttle` `free wifi` garden
#> <dbl> <chr> <chr> <lgl> <lgl> <lgl>
#> 1 2668 Brisbane Hotel TRUE TRUE TRUE
#> 2 4656 Brisbane Hotel TRUE TRUE FALSE
#> 3 4563 Brisbane Apartment FALSE FALSE FALSE
#> 4 4088 Brisbane Apartment FALSE TRUE FALSE
#> 5 2188 Brisbane Apartment FALSE TRUE FALSE
#> 6 4171 Brisbane Apartment FALSE TRUE TRUE
#> # ... with 9 more variables: breakfast <lgl>, pool <lgl>,
#> # on-site restaurant <lgl>, bbq <lgl>, spa <lgl>, laundry <lgl>,
#> # kitchen <lgl>, parking <lgl>, game console <lgl>
由 reprex package (v2.0.0)
于 2021-05-01 创建
不用说,可以从 pivot_wider
的 id_cols
参数中删除不需要的列
我正在尝试创建一个 tibble
,其中包含 property_id
和其中包含 true 或 false 值的设施列。我尝试了以下但无法继续进行。
properties %>%
select(property_id, facilities) %>%
separate(facilities, into = paste0("f", 1:9), sep = ",")
> dput(head(properties))
structure(list(property_id = c(2668, 4656, 4563, 4088, 2188,
4171), destination = c("Brisbane", "Brisbane", "Brisbane", "Brisbane",
"Brisbane", "Brisbane"), property_type = c("Hotel", "Hotel",
"Apartment", "Apartment", "Apartment", "Apartment"), nr_rooms = c(32,
39, 9, 9, 4, 5), facilities = c("airport shuttle,free wifi,garden,breakfast,pool,on-site restaurant",
"on-site restaurant,pool,airport shuttle,breakfast,bbq,free wifi,spa",
"laundry", "kitchen,laundry,free wifi", "parking,kitchen,bbq,free wifi,game console",
"kitchen,pool,laundry,parking,free wifi,garden")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
我们将 'facilities' 列拆分为 ,
,使用 qdapTools
中的 mtabulate
来获取 list
中每个唯一元素的计数,转换为逻辑矩阵 (> 0
) 和 cbind
'property_id' 列
library(qdapTools)
cbind(properties['property_id'], mtabulate(strsplit(properties$facilities, ",")) > 0)
-输出
property_id airport shuttle bbq breakfast free wifi game console garden kitchen laundry on-site restaurant parking pool spa
1 2668 TRUE FALSE TRUE TRUE FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE
2 4656 TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE FALSE TRUE TRUE
3 4563 FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
4 4088 FALSE FALSE FALSE TRUE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
5 2188 FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE
6 4171 FALSE FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE
>
在tidyr
中我们可以使用separate_rows
和pivot_wider
properties <- structure(list(property_id = c(2668, 4656, 4563, 4088, 2188,
4171), destination = c("Brisbane", "Brisbane", "Brisbane", "Brisbane",
"Brisbane", "Brisbane"), property_type = c("Hotel", "Hotel",
"Apartment", "Apartment", "Apartment", "Apartment"), nr_rooms = c(32,
39, 9, 9, 4, 5), facilities = c("airport shuttle,free wifi,garden,breakfast,pool,on-site restaurant",
"on-site restaurant,pool,airport shuttle,breakfast,bbq,free wifi,spa",
"laundry", "kitchen,laundry,free wifi", "parking,kitchen,bbq,free wifi,game console",
"kitchen,pool,laundry,parking,free wifi,garden")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
suppressMessages(library(tidyverse))
properties %>% separate_rows(facilities, sep = ',') %>%
pivot_wider(id_cols = c(property_id, destination, property_type), names_from = facilities, values_from = nr_rooms,
values_fn = function(x) is.numeric(x), values_fill = 0)
#> # A tibble: 6 x 15
#> property_id destination property_type `airport shuttle` `free wifi` garden
#> <dbl> <chr> <chr> <lgl> <lgl> <lgl>
#> 1 2668 Brisbane Hotel TRUE TRUE TRUE
#> 2 4656 Brisbane Hotel TRUE TRUE FALSE
#> 3 4563 Brisbane Apartment FALSE FALSE FALSE
#> 4 4088 Brisbane Apartment FALSE TRUE FALSE
#> 5 2188 Brisbane Apartment FALSE TRUE FALSE
#> 6 4171 Brisbane Apartment FALSE TRUE TRUE
#> # ... with 9 more variables: breakfast <lgl>, pool <lgl>,
#> # on-site restaurant <lgl>, bbq <lgl>, spa <lgl>, laundry <lgl>,
#> # kitchen <lgl>, parking <lgl>, game console <lgl>
由 reprex package (v2.0.0)
于 2021-05-01 创建不用说,可以从 pivot_wider
id_cols
参数中删除不需要的列