重塑数据集中的多个变量
reshape multiple variables in a dataset
我想将以下数据集重塑为长格式。我在 reshape 中尝试了 melt functino,但我不知道如何将 age1、age2、age3 更改为一列。谁能帮忙?谢谢。
id <- c(1,2,3,4,5)
age1 <- c(11,11,11,11,11)
age2 <- age1+2
age3 <- age2+2
ht1 <- c(120,130,125,121,130)
ht2 <- ht1 + 20
ht3 <- ht2 + 20
bmi1 <- c(18,19,17,18,18)
bmi2 <- c(20,18,19,21,24)
bmi3 <- c(21,21,21,24,27)
df <- data.frame(id=id,age1=age1,age2=age2,age3=age3,ht1=ht1,ht2=ht2,ht3=ht3,bmi1=bmi1,bmi2=bmi2,bmi3=bmi3)
来自
id sex age1 age2 age3 ht1 ht2 ht3 bmi1 bmi2 bmi3
1 1 M 11 13 15 120 140 160 18 20 21
2 2 F 11 13 15 130 150 170 19 18 21
3 3 M 11 13 15 125 145 165 17 19 21
4 4 F 11 13 15 121 141 161 18 21 24
5 5 M 11 13 15 130 150 170 18 24 27
像这样
id sex age ht bmi
1 M 11 120 18
1 M 13 140 20
1 M 15 160 21
2 F 11 130 19
2 F 13 150 18
2 F 15 170 21
3 M 11 165 17
...
我们可以用 data.table
中的 melt
来做到这一点,它可以在 measure
参数中使用多个 patterns
library(data.table)
melt(setDT(df), measure = patterns("^age", "^ht", "^bmi"),
value.name = c("age", "ht", "bmi"))[, variable := NULL][]
# id sex age ht bmi
# 1: 1 M 11 120 18
# 2: 2 F 11 130 19
# 3: 3 M 11 125 17
# 4: 4 F 11 121 18
# 5: 5 M 11 130 18
# 6: 1 M 13 140 20
# 7: 2 F 13 150 18
# 8: 3 M 13 145 19
# 9: 4 F 13 141 21
#10: 5 M 13 150 24
#11: 1 M 15 160 21
#12: 2 F 15 170 21
#13: 3 M 15 165 21
#14: 4 F 15 161 24
#15: 5 M 15 170 27
使用 reshape
你可以:
v <- c('age', 'ht', 'bmi')
reshape(df, dir = 'long', varying = lapply(v, grep, names(df)), v.names = v)
# id time age ht bmi
# 1.1 1 1 11 120 18
# 2.1 2 1 11 130 19
# 3.1 3 1 11 125 17
# 4.1 4 1 11 121 18
# 5.1 5 1 11 130 18
# 1.2 1 2 13 140 20
# 2.2 2 2 13 150 18
# 3.2 3 2 13 145 19
# 4.2 4 2 13 141 21
# 5.2 5 2 13 150 24
# 1.3 1 3 15 160 21
# 2.3 2 3 15 170 21
# 3.3 3 3 15 165 21
# 4.3 4 3 15 161 24
# 5.3 5 3 15 170 27
与dplyr-tidyr
:
library(dplyr)
library(tidyr)
df %>%
gather(key, value, -id) %>%
extract(key, c("var", "num"), "(age|ht|bmi)([0-9]+)") %>%
spread(var, value)
输出:
id number age bmi ht
1 1 1 11 18 120
2 1 2 13 20 140
3 1 3 15 21 160
4 2 1 11 19 130
5 2 2 13 18 150
6 2 3 15 21 170
7 3 1 11 17 125
8 3 2 13 19 145
9 3 3 15 21 165
10 4 1 11 18 121
11 4 2 13 21 141
12 4 3 15 24 161
13 5 1 11 18 130
14 5 2 13 24 150
15 5 3 15 27 170
我想将以下数据集重塑为长格式。我在 reshape 中尝试了 melt functino,但我不知道如何将 age1、age2、age3 更改为一列。谁能帮忙?谢谢。
id <- c(1,2,3,4,5)
age1 <- c(11,11,11,11,11)
age2 <- age1+2
age3 <- age2+2
ht1 <- c(120,130,125,121,130)
ht2 <- ht1 + 20
ht3 <- ht2 + 20
bmi1 <- c(18,19,17,18,18)
bmi2 <- c(20,18,19,21,24)
bmi3 <- c(21,21,21,24,27)
df <- data.frame(id=id,age1=age1,age2=age2,age3=age3,ht1=ht1,ht2=ht2,ht3=ht3,bmi1=bmi1,bmi2=bmi2,bmi3=bmi3)
来自
id sex age1 age2 age3 ht1 ht2 ht3 bmi1 bmi2 bmi3
1 1 M 11 13 15 120 140 160 18 20 21
2 2 F 11 13 15 130 150 170 19 18 21
3 3 M 11 13 15 125 145 165 17 19 21
4 4 F 11 13 15 121 141 161 18 21 24
5 5 M 11 13 15 130 150 170 18 24 27
像这样
id sex age ht bmi
1 M 11 120 18
1 M 13 140 20
1 M 15 160 21
2 F 11 130 19
2 F 13 150 18
2 F 15 170 21
3 M 11 165 17
...
我们可以用 data.table
中的 melt
来做到这一点,它可以在 measure
参数中使用多个 patterns
library(data.table)
melt(setDT(df), measure = patterns("^age", "^ht", "^bmi"),
value.name = c("age", "ht", "bmi"))[, variable := NULL][]
# id sex age ht bmi
# 1: 1 M 11 120 18
# 2: 2 F 11 130 19
# 3: 3 M 11 125 17
# 4: 4 F 11 121 18
# 5: 5 M 11 130 18
# 6: 1 M 13 140 20
# 7: 2 F 13 150 18
# 8: 3 M 13 145 19
# 9: 4 F 13 141 21
#10: 5 M 13 150 24
#11: 1 M 15 160 21
#12: 2 F 15 170 21
#13: 3 M 15 165 21
#14: 4 F 15 161 24
#15: 5 M 15 170 27
使用 reshape
你可以:
v <- c('age', 'ht', 'bmi')
reshape(df, dir = 'long', varying = lapply(v, grep, names(df)), v.names = v)
# id time age ht bmi
# 1.1 1 1 11 120 18
# 2.1 2 1 11 130 19
# 3.1 3 1 11 125 17
# 4.1 4 1 11 121 18
# 5.1 5 1 11 130 18
# 1.2 1 2 13 140 20
# 2.2 2 2 13 150 18
# 3.2 3 2 13 145 19
# 4.2 4 2 13 141 21
# 5.2 5 2 13 150 24
# 1.3 1 3 15 160 21
# 2.3 2 3 15 170 21
# 3.3 3 3 15 165 21
# 4.3 4 3 15 161 24
# 5.3 5 3 15 170 27
与dplyr-tidyr
:
library(dplyr)
library(tidyr)
df %>%
gather(key, value, -id) %>%
extract(key, c("var", "num"), "(age|ht|bmi)([0-9]+)") %>%
spread(var, value)
输出:
id number age bmi ht
1 1 1 11 18 120
2 1 2 13 20 140
3 1 3 15 21 160
4 2 1 11 19 130
5 2 2 13 18 150
6 2 3 15 21 170
7 3 1 11 17 125
8 3 2 13 19 145
9 3 3 15 21 165
10 4 1 11 18 121
11 4 2 13 21 141
12 4 3 15 24 161
13 5 1 11 18 130
14 5 2 13 24 150
15 5 3 15 27 170