R:更难 pivot_longer 论点
R: harder pivot_longer arguments
我有一个具有这些名称的数据框:
df <- tribble( ~ state, ~ county, ~ kfr_asian_pooled_p25, ~ kfr_asian_pooled_p75,
~ kfr_black_pooled_p25, ~ kfr_black_pooled_p75,
~ kfr_pooled_pooled_p25, ~ kfr_pooled_pooled_p75, ~ kfr_white_pooled_p25,
~ kfr_white_pooled_p75, ~ kids_count, ~ kfr_asian_pooled_p25_se,
~ kfr_asian_pooled_p75_se, ~ kfr_black_pooled_p25_se,
~ kfr_black_pooled_p75_se, ~ kfr_white_pooled_p25_se,
~ kfr_white_pooled_p75_se,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)
当然数据集是巨大的,但我想要实现的是利用其最大潜力来延长枢轴。我的意思是同时抓取几个变量并旋转它们和它们的名称。
df <- tribble(~ state, ~ county, ~ kids_count, ~ race, ~ percentile, ~ se, ~ value,
1, 2, 3, "asian", "p25", TRUE, value,
2, 3, 4, "black", "p25", TRUE, value,
3, 4, 5, "white", "p25", TRUE, value,
1, 2, 3, "asian", "p75", TRUE, value,
2, 3, 4, "black", "p75", TRUE, value,
3, 4, 5, "white", "p75", TRUE, value,
1, 2, 3, "asian", "p25", FALSE, value,
2, 3, 4, "black", "p25", FALSE, value,
3, 4, 5, "white", "p25", FALSE, value,
1, 2, 3, "asian", "p75", FALSE, value,
2, 3, 4, "black", "p75", FALSE, value,
3, 4, 5, "white", "p75", FALSE, value)
非常感谢任何帮助!
你可以这样做,如果我明白你想要什么
(首先,快速解释一下我使用的 pivot_longer()
选项)
cols
选择您要旋转的列
names_to
具有多个元素允许我们将其转换为多个列
names_prefix
允许我们删除要转换的列之间的公共前缀
name_sep
允许我们指示 names_to
中的多个名称在原始列 中是如何分解的
pivot_longer(
df,
cols = starts_with("kfr"),
names_to=c("race", "pooled", "percentile", "se"),
names_prefix="kfr_",
names_sep="_"
) %>%
select(!pooled) %>%
mutate(se=!is.na(se))
# A tibble: 14 x 7
state county kids_count race percentile se value
<dbl> <dbl> <dbl> <chr> <chr> <lgl> <dbl>
1 1 2 11 asian p25 FALSE 3
2 1 2 11 asian p75 FALSE 4
3 1 2 11 black p25 FALSE 5
4 1 2 11 black p75 FALSE 6
5 1 2 11 pooled p25 FALSE 7
6 1 2 11 pooled p75 FALSE 8
7 1 2 11 white p25 FALSE 9
8 1 2 11 white p75 FALSE 10
9 1 2 11 asian p25 TRUE 12
10 1 2 11 asian p75 TRUE 13
11 1 2 11 black p25 TRUE 14
12 1 2 11 black p75 TRUE 15
13 1 2 11 white p25 TRUE 16
14 1 2 11 white p75 TRUE 17
看起来您正在尝试 pivot_longer
仅百分位列,然后拆分名称以创建单独的列(如果我理解正确的话)。 tidyselect
函数(例如 matches
)对于选择名称相似的列非常有用。我还使用了 stringr
包中的 str_split
来帮助创建新的字符列。如果顺序对您很重要,您可以随时使用 select
或 relocate
重新组织列。
library(tidyverse)
library(stringr)
df_long <-
df %>%
pivot_longer(cols = matches("[p25]|[p75]"), names_to = "percentile") %>%
mutate(se = grepl("se", percentile),
race = stringr::str_split(percentile, "_", simplify = TRUE)[,2],
percentile = stringr::str_split(percentile, "_", simplify = TRUE)[,4])
df_long
#> # A tibble: 14 x 7
#> state county kids_count percentile value se race
#> <dbl> <dbl> <dbl> <chr> <dbl> <lgl> <chr>
#> 1 1 2 11 p25 3 FALSE asian
#> 2 1 2 11 p75 4 FALSE asian
#> 3 1 2 11 p25 5 FALSE black
#> 4 1 2 11 p75 6 FALSE black
#> 5 1 2 11 p25 7 FALSE pooled
#> 6 1 2 11 p75 8 FALSE pooled
#> 7 1 2 11 p25 9 FALSE white
#> 8 1 2 11 p75 10 FALSE white
#> 9 1 2 11 p25 12 TRUE asian
#> 10 1 2 11 p75 13 TRUE asian
#> 11 1 2 11 p25 14 TRUE black
#> 12 1 2 11 p75 15 TRUE black
#> 13 1 2 11 p25 16 TRUE white
#> 14 1 2 11 p75 17 TRUE white
由 reprex package (v2.0.1)
于 2022-02-18 创建
使用 separate()
的选项。
library(tidyverse)
df %>%
pivot_longer(-c('state', 'county', 'kids_count')) %>%
separate(name, into = c(NA, 'race', NA, 'percentile', 'se'), sep = '_') %>%
mutate(se = !is.na(se))
# # A tibble: 14 x 7
# state county kids_count race percentile se value
# <dbl> <dbl> <dbl> <chr> <chr> <lgl> <dbl>
# 1 1 2 11 asian p25 FALSE 3
# 2 1 2 11 asian p75 FALSE 4
# 3 1 2 11 black p25 FALSE 5
# 4 1 2 11 black p75 FALSE 6
# 5 1 2 11 pooled p25 FALSE 7
# 6 1 2 11 pooled p75 FALSE 8
# 7 1 2 11 white p25 FALSE 9
# 8 1 2 11 white p75 FALSE 10
# 9 1 2 11 asian p25 TRUE 12
# 10 1 2 11 asian p75 TRUE 13
# 11 1 2 11 black p25 TRUE 14
# 12 1 2 11 black p75 TRUE 15
# 13 1 2 11 white p25 TRUE 16
# 14 1 2 11 white p75 TRUE 17
我们可以结合 tidyr
的 pivot_longer
和 süöotstaclshape
的 cSplit
library(splitstackshape)
library(dplyr)
library(tidyr)
df %>%
pivot_longer(
-c(state, county, kids_count)
) %>%
cSplit("name", "_") %>%
select(state, county, kids_count, race=name_2, percentile=name_4, se=name_5, value)
state county kids_count race percentile se value
1: 1 2 11 asian p25 <NA> 3
2: 1 2 11 asian p75 <NA> 4
3: 1 2 11 black p25 <NA> 5
4: 1 2 11 black p75 <NA> 6
5: 1 2 11 pooled p25 <NA> 7
6: 1 2 11 pooled p75 <NA> 8
7: 1 2 11 white p25 <NA> 9
8: 1 2 11 white p75 <NA> 10
9: 1 2 11 asian p25 se 12
10: 1 2 11 asian p75 se 13
11: 1 2 11 black p25 se 14
12: 1 2 11 black p75 se 15
13: 1 2 11 white p25 se 16
14: 1 2 11 white p75 se 17
我有一个具有这些名称的数据框:
df <- tribble( ~ state, ~ county, ~ kfr_asian_pooled_p25, ~ kfr_asian_pooled_p75,
~ kfr_black_pooled_p25, ~ kfr_black_pooled_p75,
~ kfr_pooled_pooled_p25, ~ kfr_pooled_pooled_p75, ~ kfr_white_pooled_p25,
~ kfr_white_pooled_p75, ~ kids_count, ~ kfr_asian_pooled_p25_se,
~ kfr_asian_pooled_p75_se, ~ kfr_black_pooled_p25_se,
~ kfr_black_pooled_p75_se, ~ kfr_white_pooled_p25_se,
~ kfr_white_pooled_p75_se,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)
当然数据集是巨大的,但我想要实现的是利用其最大潜力来延长枢轴。我的意思是同时抓取几个变量并旋转它们和它们的名称。
df <- tribble(~ state, ~ county, ~ kids_count, ~ race, ~ percentile, ~ se, ~ value,
1, 2, 3, "asian", "p25", TRUE, value,
2, 3, 4, "black", "p25", TRUE, value,
3, 4, 5, "white", "p25", TRUE, value,
1, 2, 3, "asian", "p75", TRUE, value,
2, 3, 4, "black", "p75", TRUE, value,
3, 4, 5, "white", "p75", TRUE, value,
1, 2, 3, "asian", "p25", FALSE, value,
2, 3, 4, "black", "p25", FALSE, value,
3, 4, 5, "white", "p25", FALSE, value,
1, 2, 3, "asian", "p75", FALSE, value,
2, 3, 4, "black", "p75", FALSE, value,
3, 4, 5, "white", "p75", FALSE, value)
非常感谢任何帮助!
你可以这样做,如果我明白你想要什么
(首先,快速解释一下我使用的 pivot_longer()
选项)
cols
选择您要旋转的列names_to
具有多个元素允许我们将其转换为多个列names_prefix
允许我们删除要转换的列之间的公共前缀name_sep
允许我们指示names_to
中的多个名称在原始列 中是如何分解的
pivot_longer(
df,
cols = starts_with("kfr"),
names_to=c("race", "pooled", "percentile", "se"),
names_prefix="kfr_",
names_sep="_"
) %>%
select(!pooled) %>%
mutate(se=!is.na(se))
# A tibble: 14 x 7
state county kids_count race percentile se value
<dbl> <dbl> <dbl> <chr> <chr> <lgl> <dbl>
1 1 2 11 asian p25 FALSE 3
2 1 2 11 asian p75 FALSE 4
3 1 2 11 black p25 FALSE 5
4 1 2 11 black p75 FALSE 6
5 1 2 11 pooled p25 FALSE 7
6 1 2 11 pooled p75 FALSE 8
7 1 2 11 white p25 FALSE 9
8 1 2 11 white p75 FALSE 10
9 1 2 11 asian p25 TRUE 12
10 1 2 11 asian p75 TRUE 13
11 1 2 11 black p25 TRUE 14
12 1 2 11 black p75 TRUE 15
13 1 2 11 white p25 TRUE 16
14 1 2 11 white p75 TRUE 17
看起来您正在尝试 pivot_longer
仅百分位列,然后拆分名称以创建单独的列(如果我理解正确的话)。 tidyselect
函数(例如 matches
)对于选择名称相似的列非常有用。我还使用了 stringr
包中的 str_split
来帮助创建新的字符列。如果顺序对您很重要,您可以随时使用 select
或 relocate
重新组织列。
library(tidyverse)
library(stringr)
df_long <-
df %>%
pivot_longer(cols = matches("[p25]|[p75]"), names_to = "percentile") %>%
mutate(se = grepl("se", percentile),
race = stringr::str_split(percentile, "_", simplify = TRUE)[,2],
percentile = stringr::str_split(percentile, "_", simplify = TRUE)[,4])
df_long
#> # A tibble: 14 x 7
#> state county kids_count percentile value se race
#> <dbl> <dbl> <dbl> <chr> <dbl> <lgl> <chr>
#> 1 1 2 11 p25 3 FALSE asian
#> 2 1 2 11 p75 4 FALSE asian
#> 3 1 2 11 p25 5 FALSE black
#> 4 1 2 11 p75 6 FALSE black
#> 5 1 2 11 p25 7 FALSE pooled
#> 6 1 2 11 p75 8 FALSE pooled
#> 7 1 2 11 p25 9 FALSE white
#> 8 1 2 11 p75 10 FALSE white
#> 9 1 2 11 p25 12 TRUE asian
#> 10 1 2 11 p75 13 TRUE asian
#> 11 1 2 11 p25 14 TRUE black
#> 12 1 2 11 p75 15 TRUE black
#> 13 1 2 11 p25 16 TRUE white
#> 14 1 2 11 p75 17 TRUE white
由 reprex package (v2.0.1)
于 2022-02-18 创建使用 separate()
的选项。
library(tidyverse)
df %>%
pivot_longer(-c('state', 'county', 'kids_count')) %>%
separate(name, into = c(NA, 'race', NA, 'percentile', 'se'), sep = '_') %>%
mutate(se = !is.na(se))
# # A tibble: 14 x 7
# state county kids_count race percentile se value
# <dbl> <dbl> <dbl> <chr> <chr> <lgl> <dbl>
# 1 1 2 11 asian p25 FALSE 3
# 2 1 2 11 asian p75 FALSE 4
# 3 1 2 11 black p25 FALSE 5
# 4 1 2 11 black p75 FALSE 6
# 5 1 2 11 pooled p25 FALSE 7
# 6 1 2 11 pooled p75 FALSE 8
# 7 1 2 11 white p25 FALSE 9
# 8 1 2 11 white p75 FALSE 10
# 9 1 2 11 asian p25 TRUE 12
# 10 1 2 11 asian p75 TRUE 13
# 11 1 2 11 black p25 TRUE 14
# 12 1 2 11 black p75 TRUE 15
# 13 1 2 11 white p25 TRUE 16
# 14 1 2 11 white p75 TRUE 17
我们可以结合 tidyr
的 pivot_longer
和 süöotstaclshape
cSplit
library(splitstackshape)
library(dplyr)
library(tidyr)
df %>%
pivot_longer(
-c(state, county, kids_count)
) %>%
cSplit("name", "_") %>%
select(state, county, kids_count, race=name_2, percentile=name_4, se=name_5, value)
state county kids_count race percentile se value
1: 1 2 11 asian p25 <NA> 3
2: 1 2 11 asian p75 <NA> 4
3: 1 2 11 black p25 <NA> 5
4: 1 2 11 black p75 <NA> 6
5: 1 2 11 pooled p25 <NA> 7
6: 1 2 11 pooled p75 <NA> 8
7: 1 2 11 white p25 <NA> 9
8: 1 2 11 white p75 <NA> 10
9: 1 2 11 asian p25 se 12
10: 1 2 11 asian p75 se 13
11: 1 2 11 black p25 se 14
12: 1 2 11 black p75 se 15
13: 1 2 11 white p25 se 16
14: 1 2 11 white p75 se 17