重塑 post 之前的数据
Reshaping pre-post data
我有一个数据集
df <- data.frame("ID" = c("sue_1","bob_2","nick_3","joe_4"),
"1_confidence.x" = c(3,3,1,5),
"2_reading.x" = c(4,3,2,5),
"3_maths.x" = c(3,2,4,2),
"1_confidence.y" = c(3,2,3,4),
"2_reading.y" = c(3,4,2,1),
"3_maths.y" = c(3,4,2,5)
)
给这个 df:
> df
ID X1_confidence.x X2_reading.x X3_maths.x X1_confidence.y X2_reading.y X3_maths.y
1 sue_1 3 4 3 3 3 3
2 bob_2 3 3 2 2 4 4
3 nick_3 1 2 4 3 2 2
4 joe_4 5 5 2 4 1 5
我希望它变成这种格式:
ID Test X1_confidence X2_reading X3_maths
1 sue_1 pre 3 4 3
2 sue_1 post 3 3 3
3 bob_2 pre 3 3 2
4 bob_2 post 2 4 4
5 nick_3 pre 1 2 4
6 nick_3 post 3 2 2
7 joe_4 pre 5 5 2
8 joe_4 post 4 1 5
我尝试过重塑和聚集,但似乎无法弄清楚...
这应该可以解决问题:
df_long <- reshape(
data = df,
varying = list(c("X1_confidence.x","X1_confidence.y"),
c("X2_reading.x","X2_reading.y"),
c("X3_maths.x","X3_maths.y")),
idvar = 'ID',
v.names = c('X1_confidence', 'X2_reading', 'X3_maths'),
timevar = 'Test',
times = c('pre', 'post'),
direction = 'long'
)
然后按ID排序:
df_long <- df_long[order(df_long$ID, decreasing = T), ]
应该有更多 "direct" 的方法来仅使用 pivot_longer
来做到这一点。我无法得到正确的论据。这是 tidyr 1.0.0
中一起使用 pivot_longer
和 pivot_wider
的一种方法
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = starts_with("X"), names_to = "key") %>%
mutate(key = sub("\.x$|\.y$", "", key)) %>%
group_by(ID, key) %>%
mutate(Test = c("pre", "post")) %>%
pivot_wider(c(ID, Test), key)
# ID Test X1_confidence X2_reading X3_maths
# <fct> <chr> <dbl> <dbl> <dbl>
#1 sue_1 pre 3 4 3
#2 sue_1 post 3 3 3
#3 bob_2 pre 3 3 2
#4 bob_2 post 2 4 4
#5 nick_3 pre 1 2 4
#6 nick_3 post 3 2 2
#7 joe_4 pre 5 5 2
#8 joe_4 post 4 1 5
如果您的 tidyr
没有更新,这里使用 gather
和 spread
是一样的
df %>%
gather(key, value, -ID) %>%
mutate(key = sub("\.x$|\.y$", "", key)) %>%
group_by(key) %>%
mutate(Test = c("pre", "post")) %>%
spread(key, value)
我有一个数据集
df <- data.frame("ID" = c("sue_1","bob_2","nick_3","joe_4"),
"1_confidence.x" = c(3,3,1,5),
"2_reading.x" = c(4,3,2,5),
"3_maths.x" = c(3,2,4,2),
"1_confidence.y" = c(3,2,3,4),
"2_reading.y" = c(3,4,2,1),
"3_maths.y" = c(3,4,2,5)
)
给这个 df:
> df
ID X1_confidence.x X2_reading.x X3_maths.x X1_confidence.y X2_reading.y X3_maths.y
1 sue_1 3 4 3 3 3 3
2 bob_2 3 3 2 2 4 4
3 nick_3 1 2 4 3 2 2
4 joe_4 5 5 2 4 1 5
我希望它变成这种格式:
ID Test X1_confidence X2_reading X3_maths
1 sue_1 pre 3 4 3
2 sue_1 post 3 3 3
3 bob_2 pre 3 3 2
4 bob_2 post 2 4 4
5 nick_3 pre 1 2 4
6 nick_3 post 3 2 2
7 joe_4 pre 5 5 2
8 joe_4 post 4 1 5
我尝试过重塑和聚集,但似乎无法弄清楚...
这应该可以解决问题:
df_long <- reshape(
data = df,
varying = list(c("X1_confidence.x","X1_confidence.y"),
c("X2_reading.x","X2_reading.y"),
c("X3_maths.x","X3_maths.y")),
idvar = 'ID',
v.names = c('X1_confidence', 'X2_reading', 'X3_maths'),
timevar = 'Test',
times = c('pre', 'post'),
direction = 'long'
)
然后按ID排序:
df_long <- df_long[order(df_long$ID, decreasing = T), ]
应该有更多 "direct" 的方法来仅使用 pivot_longer
来做到这一点。我无法得到正确的论据。这是 tidyr 1.0.0
pivot_longer
和 pivot_wider
的一种方法
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = starts_with("X"), names_to = "key") %>%
mutate(key = sub("\.x$|\.y$", "", key)) %>%
group_by(ID, key) %>%
mutate(Test = c("pre", "post")) %>%
pivot_wider(c(ID, Test), key)
# ID Test X1_confidence X2_reading X3_maths
# <fct> <chr> <dbl> <dbl> <dbl>
#1 sue_1 pre 3 4 3
#2 sue_1 post 3 3 3
#3 bob_2 pre 3 3 2
#4 bob_2 post 2 4 4
#5 nick_3 pre 1 2 4
#6 nick_3 post 3 2 2
#7 joe_4 pre 5 5 2
#8 joe_4 post 4 1 5
如果您的 tidyr
没有更新,这里使用 gather
和 spread
df %>%
gather(key, value, -ID) %>%
mutate(key = sub("\.x$|\.y$", "", key)) %>%
group_by(key) %>%
mutate(Test = c("pre", "post")) %>%
spread(key, value)