使用复数 pivot_wider 来处理时间序列/纵向测量设计
Using a complex pivot_wider to deal with a time series / longitudinal measurement design
我的数据集中有三个变量(名称、age_group、parents_total)。 parents_total
测量了两次。现在,我想使用 pivot_wider
来保留 name
和 age_group
,但使用 parents_total
.
的 t2 结果“计算”一个新变量
在此示例中,age_group
可能会更改。如果可能的话,如果 age_group
保持不变,我想计算这个“t2”变量。
相关主题:
数据与代码
df <- structure(list(name = c("Arthur Henry Ra", "Laura Fernanda ",
"Gabriel Mistr<e3>o", "Gabriel Augusto", "Felipe Antonio ", "Ana Beatriz de ",
"Laura Fernanda ", "Pedro Henrique ", "Felipe de Melo ", "Gabriel Augusto",
"Seann Nichollas", "Gabriel Prata N", "Henrique Cesar ", "Rizotony Bogach",
"Henrique Cesar ", "Pedro Henrique ", "Jos<e9> Marques Go", "Jo<e3>o Guilherme ",
"Giovanna Marcia", "Jo<e3>o Pedro Silv", "Ana Luisa Santa", "Heitor Faria da",
"Pedro Domencian", "Heitor Faria da", "Jo<e3>o Guilherme ", "Jo<e3>o Pedro Silv",
"Melissa Aguila ", "Raphael Pescuma", "Jos<e9> Marques Go", "Maria Tereza Si",
"Larissa Fernand", "Pedro Cesar Rib", "Pedro Cesar Rib", "Nicollas Manuel",
"Melissa Aguila ", "Matheus Alexand", "Arthur Henry Ra", "Seann Nichollas",
"Lucas de Lucca ", "Matheus Alexand", "Felipe Antonio ", "Giovanna Marcia",
"Gisela Brand<e3>o ", "Felipe de Melo ", "Gabriel Mistr<e3>o",
"Leticia Silva M", "Larissa Fernand", "Nicollas Manuel", "Isabela Carolin",
"Giovanna Marcia", "Isabela Carolin", "Ana Luisa Santa", "Gisela Brand<e3>o ",
"Jo<e3>o Vitor Lope", "Gabriel Augusto", "Vitor Pontes Ra", "Gabriel Augusto",
"Tainara da Silv", "Larissa Fernand", "Ana Beatriz de ", "Vitor Pontes Ra",
"Matheus Alexand", "Gabriel Prata N", "Luis Augusto Sc", "Larissa Fernand",
"Pedro Domencian", "Raphael Pescuma", "Rizotony Bogach", "Maria Tereza Si",
"Lucas de Lucca ", "Jo<e3>o Pedro Silv", "Luis Augusto Sc", "Jo<e3>o Vitor Lope",
"Tainara da Silv", "Leticia Silva M", "Henrique Cesar "), age_group = structure(c(2L,
5L, 5L, 4L, 1L, 3L, 4L, 5L, 2L, 3L, 2L, 3L, 4L, 5L, 3L, 2L, 6L,
3L, 2L, 2L, 5L, 2L, 7L, 2L, 3L, 3L, 7L, 3L, 5L, 4L, 5L, 3L, 3L,
4L, 4L, 4L, NA, 3L, 2L, 7L, 2L, 2L, 7L, 3L, 5L, 3L, 7L, 4L, 5L,
3L, 5L, 6L, 6L, 2L, 3L, 2L, 4L, 5L, 5L, 2L, 2L, 5L, 3L, 2L, 4L,
7L, 2L, 4L, 4L, 1L, 3L, 3L, 2L, 5L, 4L, 3L), .Label = c("[5,6]",
"(6,8]", "(8,10]", "(10,12]", "(12,14]", "(14,16]", "(16,18]",
"(18,Inf]"), class = "factor"), parents_total = c(177, 178, 91,
128, 138, 146, 185, 177, 97, 79, 167, 160, 96, 160, 138, 165,
86, 183, 94, 118, 109, 144, 114, 132, 169, 106, 21, 131, 137,
123, 84, 190, 186, 162, 103, 180, 172, 135, 129, 189, 122, 115,
122, 66, 96, 151, 95, 179, 196, 122, 182, 112, 147, 133, 125,
152, 112, 162, 168, 100, 110, 189, 136, 193, 140, 131, 117, 186,
98, 158, 103, 94, 147, 148, 144, 156)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -76L))
library(dplyr)
dat %>%
group_by(name, age_group) %>%
mutate(tn = paste0("parents_total_t", row_number())) %>%
pivot_wider(c(name, age_group), names_from = tn, values_from = parents_total) %>%
ungroup()
# # A tibble: 58 x 4
# name age_group parents_total_t1 parents_total_t2
# <chr> <fct> <dbl> <dbl>
# 1 "Arthur Henry Ra" (6,8] 177 NA
# 2 "Laura Fernanda " (12,14] 178 NA
# 3 "Gabriel Mistr<e3>o" (12,14] 91 96
# 4 "Gabriel Augusto" (10,12] 128 112
# 5 "Felipe Antonio " [5,6] 138 NA
# 6 "Ana Beatriz de " (8,10] 146 NA
# 7 "Laura Fernanda " (10,12] 185 NA
# 8 "Pedro Henrique " (12,14] 177 NA
# 9 "Felipe de Melo " (6,8] 97 NA
# 10 "Gabriel Augusto" (8,10] 79 125
# # ... with 48 more rows
这是一个没有旋转的解决方案。
数据的格局先建议给arrange
然后 mutate
使用 lead
的新列
library(dplyr)
df %>%
arrange(name, parents_total) %>%
group_by(name) %>%
mutate(parents_total2 = lead(parents_total),
parents_total = ifelse(parents_total == last(parents_total), NA, parents_total))
name age_group parents_total parents_total2
<chr> <fct> <dbl> <dbl>
1 "Ana Beatriz de " (6,8] 100 146
2 "Ana Beatriz de " (8,10] NA NA
3 "Ana Luisa Santa" (12,14] 109 112
4 "Ana Luisa Santa" (14,16] NA NA
5 "Arthur Henry Ra" NA 172 177
6 "Arthur Henry Ra" (6,8] NA NA
7 "Felipe Antonio " (6,8] 122 138
8 "Felipe Antonio " [5,6] NA NA
9 "Felipe de Melo " (8,10] 66 97
10 "Felipe de Melo " (6,8] NA NA
# ... with 66 more rows
我的数据集中有三个变量(名称、age_group、parents_total)。 parents_total
测量了两次。现在,我想使用 pivot_wider
来保留 name
和 age_group
,但使用 parents_total
.
在此示例中,age_group
可能会更改。如果可能的话,如果 age_group
保持不变,我想计算这个“t2”变量。
相关主题:
数据与代码
df <- structure(list(name = c("Arthur Henry Ra", "Laura Fernanda ",
"Gabriel Mistr<e3>o", "Gabriel Augusto", "Felipe Antonio ", "Ana Beatriz de ",
"Laura Fernanda ", "Pedro Henrique ", "Felipe de Melo ", "Gabriel Augusto",
"Seann Nichollas", "Gabriel Prata N", "Henrique Cesar ", "Rizotony Bogach",
"Henrique Cesar ", "Pedro Henrique ", "Jos<e9> Marques Go", "Jo<e3>o Guilherme ",
"Giovanna Marcia", "Jo<e3>o Pedro Silv", "Ana Luisa Santa", "Heitor Faria da",
"Pedro Domencian", "Heitor Faria da", "Jo<e3>o Guilherme ", "Jo<e3>o Pedro Silv",
"Melissa Aguila ", "Raphael Pescuma", "Jos<e9> Marques Go", "Maria Tereza Si",
"Larissa Fernand", "Pedro Cesar Rib", "Pedro Cesar Rib", "Nicollas Manuel",
"Melissa Aguila ", "Matheus Alexand", "Arthur Henry Ra", "Seann Nichollas",
"Lucas de Lucca ", "Matheus Alexand", "Felipe Antonio ", "Giovanna Marcia",
"Gisela Brand<e3>o ", "Felipe de Melo ", "Gabriel Mistr<e3>o",
"Leticia Silva M", "Larissa Fernand", "Nicollas Manuel", "Isabela Carolin",
"Giovanna Marcia", "Isabela Carolin", "Ana Luisa Santa", "Gisela Brand<e3>o ",
"Jo<e3>o Vitor Lope", "Gabriel Augusto", "Vitor Pontes Ra", "Gabriel Augusto",
"Tainara da Silv", "Larissa Fernand", "Ana Beatriz de ", "Vitor Pontes Ra",
"Matheus Alexand", "Gabriel Prata N", "Luis Augusto Sc", "Larissa Fernand",
"Pedro Domencian", "Raphael Pescuma", "Rizotony Bogach", "Maria Tereza Si",
"Lucas de Lucca ", "Jo<e3>o Pedro Silv", "Luis Augusto Sc", "Jo<e3>o Vitor Lope",
"Tainara da Silv", "Leticia Silva M", "Henrique Cesar "), age_group = structure(c(2L,
5L, 5L, 4L, 1L, 3L, 4L, 5L, 2L, 3L, 2L, 3L, 4L, 5L, 3L, 2L, 6L,
3L, 2L, 2L, 5L, 2L, 7L, 2L, 3L, 3L, 7L, 3L, 5L, 4L, 5L, 3L, 3L,
4L, 4L, 4L, NA, 3L, 2L, 7L, 2L, 2L, 7L, 3L, 5L, 3L, 7L, 4L, 5L,
3L, 5L, 6L, 6L, 2L, 3L, 2L, 4L, 5L, 5L, 2L, 2L, 5L, 3L, 2L, 4L,
7L, 2L, 4L, 4L, 1L, 3L, 3L, 2L, 5L, 4L, 3L), .Label = c("[5,6]",
"(6,8]", "(8,10]", "(10,12]", "(12,14]", "(14,16]", "(16,18]",
"(18,Inf]"), class = "factor"), parents_total = c(177, 178, 91,
128, 138, 146, 185, 177, 97, 79, 167, 160, 96, 160, 138, 165,
86, 183, 94, 118, 109, 144, 114, 132, 169, 106, 21, 131, 137,
123, 84, 190, 186, 162, 103, 180, 172, 135, 129, 189, 122, 115,
122, 66, 96, 151, 95, 179, 196, 122, 182, 112, 147, 133, 125,
152, 112, 162, 168, 100, 110, 189, 136, 193, 140, 131, 117, 186,
98, 158, 103, 94, 147, 148, 144, 156)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -76L))
library(dplyr)
dat %>%
group_by(name, age_group) %>%
mutate(tn = paste0("parents_total_t", row_number())) %>%
pivot_wider(c(name, age_group), names_from = tn, values_from = parents_total) %>%
ungroup()
# # A tibble: 58 x 4
# name age_group parents_total_t1 parents_total_t2
# <chr> <fct> <dbl> <dbl>
# 1 "Arthur Henry Ra" (6,8] 177 NA
# 2 "Laura Fernanda " (12,14] 178 NA
# 3 "Gabriel Mistr<e3>o" (12,14] 91 96
# 4 "Gabriel Augusto" (10,12] 128 112
# 5 "Felipe Antonio " [5,6] 138 NA
# 6 "Ana Beatriz de " (8,10] 146 NA
# 7 "Laura Fernanda " (10,12] 185 NA
# 8 "Pedro Henrique " (12,14] 177 NA
# 9 "Felipe de Melo " (6,8] 97 NA
# 10 "Gabriel Augusto" (8,10] 79 125
# # ... with 48 more rows
这是一个没有旋转的解决方案。
数据的格局先建议给arrange
然后 mutate
使用 lead
library(dplyr)
df %>%
arrange(name, parents_total) %>%
group_by(name) %>%
mutate(parents_total2 = lead(parents_total),
parents_total = ifelse(parents_total == last(parents_total), NA, parents_total))
name age_group parents_total parents_total2
<chr> <fct> <dbl> <dbl>
1 "Ana Beatriz de " (6,8] 100 146
2 "Ana Beatriz de " (8,10] NA NA
3 "Ana Luisa Santa" (12,14] 109 112
4 "Ana Luisa Santa" (14,16] NA NA
5 "Arthur Henry Ra" NA 172 177
6 "Arthur Henry Ra" (6,8] NA NA
7 "Felipe Antonio " (6,8] 122 138
8 "Felipe Antonio " [5,6] NA NA
9 "Felipe de Melo " (8,10] 66 97
10 "Felipe de Melo " (6,8] NA NA
# ... with 66 more rows