使用 dplyr 旋转多列
pivoting multiple columns long using dplyr
我希望使用 dplyr 语法将宽格式的多个列转换为更长的列
我的数据如下:
x <- data.frame(
provider_id = c(1, 2, 3),
code_1 = c("207ZP0102X", "208600000X", "208100000X"),
primary = c("y", "n", "n"),
code_2 = c("208000000X", "207ZP0102X", "208600000X"),
primary = c("n", "n", "y"),
code_3 = c("208100000X", "208600000X", "207ZP0102X"),
primary = c("n", "y", "n")
)
我希望转换为以下格式,但我无法找出实现此目的的 dplyr 语法。
如有任何帮助,我们将不胜感激
您可以在应用前重命名列 pivot_longer
:
library(dplyr)
library(tidyr)
x %>%
rename(primary_1 = primary, primary_2 = primary.1, primary_3 = primary.2) %>%
pivot_longer(-provider_id, names_to = c(".value", "Code"), names_sep = "_") %>%
rename(value = code) %>%
mutate(Code = paste0("Code_", Code))
#> # A tibble: 9 × 4
#> provider_id Code value primary
#> <dbl> <chr> <chr> <chr>
#> 1 1 Code_1 207ZP0102X y
#> 2 1 Code_2 208000000X n
#> 3 1 Code_3 208100000X n
#> 4 2 Code_1 208600000X n
#> 5 2 Code_2 207ZP0102X n
#> 6 2 Code_3 208600000X y
#> 7 3 Code_1 208100000X n
#> 8 3 Code_2 208600000X y
#> 9 3 Code_3 207ZP0102X n
这里的问题主要是名称不一致。您可以编写一个函数,将以“primary”开头的列子集重命名为“primary_1”、“primary_2”……按它们出现的顺序编号。
然后您就可以将@stefan 提供的 pivot_longer 代码应用到具有或多或少对 (code_xx, primary.xx) 列的表
library(tidyr)
library(dplyr, warn.conflicts = FALSE)
fix_names <- function(nms){
# Columns starting with primary should be named primary_1, primary_2, ...
is_primary <- grepl('^primary', nms)
replace(nms, is_primary, paste0('primary', '_', seq(sum(is_primary))))
}
x %>%
rename_with(fix_names) %>%
pivot_longer(-provider_id, names_to = c(".value", "Code"), names_sep = "_") %>%
rename(value = code) %>%
mutate(Code = paste0("Code_", Code))
#> # A tibble: 9 × 4
#> provider_id Code value primary
#> <dbl> <chr> <chr> <chr>
#> 1 1 Code_1 207ZP0102X y
#> 2 1 Code_2 208000000X n
#> 3 1 Code_3 208100000X n
#> 4 2 Code_1 208600000X n
#> 5 2 Code_2 207ZP0102X n
#> 6 2 Code_3 208600000X y
#> 7 3 Code_1 208100000X n
#> 8 3 Code_2 208600000X y
#> 9 3 Code_3 207ZP0102X n
由 reprex package (v2.0.1)
创建于 2022-03-11
我希望使用 dplyr 语法将宽格式的多个列转换为更长的列
我的数据如下:
x <- data.frame(
provider_id = c(1, 2, 3),
code_1 = c("207ZP0102X", "208600000X", "208100000X"),
primary = c("y", "n", "n"),
code_2 = c("208000000X", "207ZP0102X", "208600000X"),
primary = c("n", "n", "y"),
code_3 = c("208100000X", "208600000X", "207ZP0102X"),
primary = c("n", "y", "n")
)
我希望转换为以下格式,但我无法找出实现此目的的 dplyr 语法。
如有任何帮助,我们将不胜感激
您可以在应用前重命名列 pivot_longer
:
library(dplyr)
library(tidyr)
x %>%
rename(primary_1 = primary, primary_2 = primary.1, primary_3 = primary.2) %>%
pivot_longer(-provider_id, names_to = c(".value", "Code"), names_sep = "_") %>%
rename(value = code) %>%
mutate(Code = paste0("Code_", Code))
#> # A tibble: 9 × 4
#> provider_id Code value primary
#> <dbl> <chr> <chr> <chr>
#> 1 1 Code_1 207ZP0102X y
#> 2 1 Code_2 208000000X n
#> 3 1 Code_3 208100000X n
#> 4 2 Code_1 208600000X n
#> 5 2 Code_2 207ZP0102X n
#> 6 2 Code_3 208600000X y
#> 7 3 Code_1 208100000X n
#> 8 3 Code_2 208600000X y
#> 9 3 Code_3 207ZP0102X n
这里的问题主要是名称不一致。您可以编写一个函数,将以“primary”开头的列子集重命名为“primary_1”、“primary_2”……按它们出现的顺序编号。
然后您就可以将@stefan 提供的 pivot_longer 代码应用到具有或多或少对 (code_xx, primary.xx) 列的表
library(tidyr)
library(dplyr, warn.conflicts = FALSE)
fix_names <- function(nms){
# Columns starting with primary should be named primary_1, primary_2, ...
is_primary <- grepl('^primary', nms)
replace(nms, is_primary, paste0('primary', '_', seq(sum(is_primary))))
}
x %>%
rename_with(fix_names) %>%
pivot_longer(-provider_id, names_to = c(".value", "Code"), names_sep = "_") %>%
rename(value = code) %>%
mutate(Code = paste0("Code_", Code))
#> # A tibble: 9 × 4
#> provider_id Code value primary
#> <dbl> <chr> <chr> <chr>
#> 1 1 Code_1 207ZP0102X y
#> 2 1 Code_2 208000000X n
#> 3 1 Code_3 208100000X n
#> 4 2 Code_1 208600000X n
#> 5 2 Code_2 207ZP0102X n
#> 6 2 Code_3 208600000X y
#> 7 3 Code_1 208100000X n
#> 8 3 Code_2 208600000X y
#> 9 3 Code_3 207ZP0102X n
由 reprex package (v2.0.1)
创建于 2022-03-11