pivot_longer 与 names_pattern 和成对的列
pivot_longer with names_pattern and pairs of columns
我试图弄清楚如何在以下示例中使用 tidyr
中的 pivot_longer
。原来的 table 称为 dat_plot
的结构如下:
year organizational_based action_based ideological_based share_org_based share_ideo_based share_act_based
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1956 1 0 0 2 95 95
2 2000 0 0 0 92 87 91
也在这里:
dat_plot <- structure(list(year = c(1956, 2000), organizational_based = c(1,
0), action_based = c(0, 0), ideological_based = c(0, 0), share_org_based = c(2,
92), share_ideo_based = c(95, 87), share_act_based = c(95, 91
)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
))
我想把它变成长格式,方法如下:
year based based_value share share_value
1 1956 organizational 1 org_based 2
2 1956 action 0 ideo_based 95
3 1956 ideological 0 act_based 95
4 2000 organizational 0 org_based 92
5 2000 action 0 ideo_based 87
6 2000 ideological 0 act_based 91
或者,dput
:
solution <- structure(list(year = c(1956, 1956, 1956, 2000, 2000, 2000),
based = c("organizational", "action", "ideological", "organizational",
"action", "ideological"), based_value = c(1, 0, 0, 0, 0,
0), share = c("org_based", "ideo_based", "act_based", "org_based",
"ideo_based", "act_based"), share_value = c(2, 95, 95, 92,
87, 91)), class = "data.frame", row.names = c(NA, -6L))
我想我必须和 names_pattern
一起工作,我试过的是这样的,但是如果你尝试你会发现,这不是我想要的:
pivot_longer(data=dat_plot, cols=c("share_org_based", "share_ideo_based", "share_act_based",
"organizational_based", "action_based", "ideological_based"),
names_pattern = c("(share_[A-Za-z]+)([A-Za-z]+_based)"),
names_to = c("share", ".value"),
values_to = "value")
对于 names_pattern
工作原理或我遗漏了什么的任何线索,我深表感谢。
或者另一个选项是 to_long
来自 sjmisc
library(sjmisc)
dat_plot %>%
to_long(keys = c("based", "share"),
values = c("value_based", "value_share"), names(.)[2:4], names(.)[5:7] )
year based value_based share value_share
1 1956 organizational_based 1 share_org_based 2
2 2000 organizational_based 0 share_org_based 92
3 1956 action_based 0 share_ideo_based 95
4 2000 action_based 0 share_ideo_based 87
5 1956 ideological_based 0 share_act_based 95
6 2000 ideological_based 0 share_act_based 91
这是 tidyr
的另一种方式:
dat_plot %>%
pivot_longer(cols = starts_with("share"), names_to = "share", names_prefix = "share_", values_to = "share_value") %>%
pivot_longer(cols = ends_with("based"), names_to = "based", names_pattern = "(.*)_based", values_to = "based_value") %>%
filter(substr(share, 1, 3) == substr(based, 1, 3))
输出
# A tibble: 6 x 5
year share share_value based based_value
<dbl> <chr> <dbl> <chr> <dbl>
1 1956 org_based 2 organizational 1
2 1956 ideo_based 95 ideological 0
3 1956 act_based 95 action 0
4 2000 org_based 92 organizational 0
5 2000 ideo_based 87 ideological 0
6 2000 act_based 91 action 0
这是另一种 tidyverse
方法:数据按字母顺序排列 based
:
library(dplyr)
library(tidyr)
dat_plot %>%
pivot_longer(-year) %>%
arrange(name) %>%
mutate(name = str_remove_all(name, "share_")) %>%
group_by(group = rep(row_number(), each=6, length.out = n())) %>%
mutate(name = ifelse(group==1, str_remove_all(name, "_based"), name)) %>%
pivot_wider(names_from = group, values_from = c(name, value), values_fn = list) %>%
unnest(cols = c(name_1, name_2, value_1, value_2)) %>%
select(year, based=name_1, based_value=value_1, share=name_2, share_value=value_2)
year based based_value share share_value
<dbl> <chr> <dbl> <chr> <dbl>
1 1956 action 0 act_based 95
2 1956 ideological 0 ideo_based 95
3 1956 organizational 1 org_based 2
4 2000 action 0 act_based 91
5 2000 ideological 0 ideo_based 87
6 2000 organizational 0 org_based 92
我试图弄清楚如何在以下示例中使用 tidyr
中的 pivot_longer
。原来的 table 称为 dat_plot
的结构如下:
year organizational_based action_based ideological_based share_org_based share_ideo_based share_act_based
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1956 1 0 0 2 95 95
2 2000 0 0 0 92 87 91
也在这里:
dat_plot <- structure(list(year = c(1956, 2000), organizational_based = c(1,
0), action_based = c(0, 0), ideological_based = c(0, 0), share_org_based = c(2,
92), share_ideo_based = c(95, 87), share_act_based = c(95, 91
)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
))
我想把它变成长格式,方法如下:
year based based_value share share_value
1 1956 organizational 1 org_based 2
2 1956 action 0 ideo_based 95
3 1956 ideological 0 act_based 95
4 2000 organizational 0 org_based 92
5 2000 action 0 ideo_based 87
6 2000 ideological 0 act_based 91
或者,dput
:
solution <- structure(list(year = c(1956, 1956, 1956, 2000, 2000, 2000),
based = c("organizational", "action", "ideological", "organizational",
"action", "ideological"), based_value = c(1, 0, 0, 0, 0,
0), share = c("org_based", "ideo_based", "act_based", "org_based",
"ideo_based", "act_based"), share_value = c(2, 95, 95, 92,
87, 91)), class = "data.frame", row.names = c(NA, -6L))
我想我必须和 names_pattern
一起工作,我试过的是这样的,但是如果你尝试你会发现,这不是我想要的:
pivot_longer(data=dat_plot, cols=c("share_org_based", "share_ideo_based", "share_act_based",
"organizational_based", "action_based", "ideological_based"),
names_pattern = c("(share_[A-Za-z]+)([A-Za-z]+_based)"),
names_to = c("share", ".value"),
values_to = "value")
对于 names_pattern
工作原理或我遗漏了什么的任何线索,我深表感谢。
或者另一个选项是 to_long
来自 sjmisc
library(sjmisc)
dat_plot %>%
to_long(keys = c("based", "share"),
values = c("value_based", "value_share"), names(.)[2:4], names(.)[5:7] )
year based value_based share value_share
1 1956 organizational_based 1 share_org_based 2
2 2000 organizational_based 0 share_org_based 92
3 1956 action_based 0 share_ideo_based 95
4 2000 action_based 0 share_ideo_based 87
5 1956 ideological_based 0 share_act_based 95
6 2000 ideological_based 0 share_act_based 91
这是 tidyr
的另一种方式:
dat_plot %>%
pivot_longer(cols = starts_with("share"), names_to = "share", names_prefix = "share_", values_to = "share_value") %>%
pivot_longer(cols = ends_with("based"), names_to = "based", names_pattern = "(.*)_based", values_to = "based_value") %>%
filter(substr(share, 1, 3) == substr(based, 1, 3))
输出
# A tibble: 6 x 5
year share share_value based based_value
<dbl> <chr> <dbl> <chr> <dbl>
1 1956 org_based 2 organizational 1
2 1956 ideo_based 95 ideological 0
3 1956 act_based 95 action 0
4 2000 org_based 92 organizational 0
5 2000 ideo_based 87 ideological 0
6 2000 act_based 91 action 0
这是另一种 tidyverse
方法:数据按字母顺序排列 based
:
library(dplyr)
library(tidyr)
dat_plot %>%
pivot_longer(-year) %>%
arrange(name) %>%
mutate(name = str_remove_all(name, "share_")) %>%
group_by(group = rep(row_number(), each=6, length.out = n())) %>%
mutate(name = ifelse(group==1, str_remove_all(name, "_based"), name)) %>%
pivot_wider(names_from = group, values_from = c(name, value), values_fn = list) %>%
unnest(cols = c(name_1, name_2, value_1, value_2)) %>%
select(year, based=name_1, based_value=value_1, share=name_2, share_value=value_2)
year based based_value share share_value
<dbl> <chr> <dbl> <chr> <dbl>
1 1956 action 0 act_based 95
2 1956 ideological 0 ideo_based 95
3 1956 organizational 1 org_based 2
4 2000 action 0 act_based 91
5 2000 ideological 0 ideo_based 87
6 2000 organizational 0 org_based 92