按 ID 重塑数据框
Reshape dataframe by ID
我有一个数据集
id age edu blood
1 30-39 Primary 5.5
1 20-29 Secondary 8.7
1 30-39 Primary 10
2 30-39 Primary 11
2 20-29 Secondary 10
2 20-29 Secondary 9
我想要像这样的 id 明智的输出:
id age30_39count age20_29count edu_pri_count edu_sec_count blood_median
1 2 1 2 1 8.7
2 1 2 1 2 10
我试过R代码:
library(dplyr)
library(tidyr)
ddply(dat, "id", spread, age, age, edu, edu, blood, blood_median=median(blood))
但它没有显示预期的结果。有人可以帮忙吗?
你的意思是这样的?
> library(dplyr)
> library(tidyr)
> group_by(df,id,age) %>% gather(variable,value,age,edu) %>%
unite(tag,variable,value) %>%
mutate(medblood=median(blood)) %>%
spread(tag,id) %>% select(-blood) %>%
select(-medblood,medblood)
# A tibble: 6 x 5
`age_20-29` `age_30-39` edu_Primary edu_Secondary medblood
<int> <int> <int> <int> <dbl>
1 NA 1 1 NA 8.70
2 1 NA NA 1 8.70
3 2 NA NA 2 10.0
4 NA 1 1 NA 8.70
5 2 NA NA 2 10.0
6 NA 2 2 NA 10.0
最后一个 select(-medblood,medblood)
将血柱中位数移到最右边。不过,您可能想要这样做:
> group_by(df,id,age) %>% gather(variable,value,age,edu) %>%
unite(tag,variable,value) %>%
mutate(medblood=median(blood)) %>%
count(medblood,id,tag) %>% spread(tag,n)
# A tibble: 2 x 6
# Groups: id [2]
id medblood `age_20-29` `age_30-39` edu_Primary edu_Secondary
<int> <dbl> <int> <int> <int> <int>
1 1 8.70 1 2 2 1
2 2 10.0 2 1 1 2
这是用于此示例的数据 df
的 dput
:
> dput(df)
structure(list(id = c(1L, 1L, 1L, 2L, 2L, 2L), age = structure(c(2L,
1L, 2L, 2L, 1L, 1L), .Label = c("20-29", "30-39"), class = "factor"),
edu = structure(c(1L, 2L, 1L, 1L, 2L, 2L), .Label = c("Primary",
"Secondary"), class = "factor"), blood = c(5.5, 8.7, 10,
11, 10, 9)), .Names = c("id", "age", "edu", "blood"), class = "data.frame", row.names = c(NA,
-6L))
我有一个数据集
id age edu blood
1 30-39 Primary 5.5
1 20-29 Secondary 8.7
1 30-39 Primary 10
2 30-39 Primary 11
2 20-29 Secondary 10
2 20-29 Secondary 9
我想要像这样的 id 明智的输出:
id age30_39count age20_29count edu_pri_count edu_sec_count blood_median
1 2 1 2 1 8.7
2 1 2 1 2 10
我试过R代码:
library(dplyr)
library(tidyr)
ddply(dat, "id", spread, age, age, edu, edu, blood, blood_median=median(blood))
但它没有显示预期的结果。有人可以帮忙吗?
你的意思是这样的?
> library(dplyr)
> library(tidyr)
> group_by(df,id,age) %>% gather(variable,value,age,edu) %>%
unite(tag,variable,value) %>%
mutate(medblood=median(blood)) %>%
spread(tag,id) %>% select(-blood) %>%
select(-medblood,medblood)
# A tibble: 6 x 5
`age_20-29` `age_30-39` edu_Primary edu_Secondary medblood
<int> <int> <int> <int> <dbl>
1 NA 1 1 NA 8.70
2 1 NA NA 1 8.70
3 2 NA NA 2 10.0
4 NA 1 1 NA 8.70
5 2 NA NA 2 10.0
6 NA 2 2 NA 10.0
最后一个 select(-medblood,medblood)
将血柱中位数移到最右边。不过,您可能想要这样做:
> group_by(df,id,age) %>% gather(variable,value,age,edu) %>%
unite(tag,variable,value) %>%
mutate(medblood=median(blood)) %>%
count(medblood,id,tag) %>% spread(tag,n)
# A tibble: 2 x 6
# Groups: id [2]
id medblood `age_20-29` `age_30-39` edu_Primary edu_Secondary
<int> <dbl> <int> <int> <int> <int>
1 1 8.70 1 2 2 1
2 2 10.0 2 1 1 2
这是用于此示例的数据 df
的 dput
:
> dput(df)
structure(list(id = c(1L, 1L, 1L, 2L, 2L, 2L), age = structure(c(2L,
1L, 2L, 2L, 1L, 1L), .Label = c("20-29", "30-39"), class = "factor"),
edu = structure(c(1L, 2L, 1L, 1L, 2L, 2L), .Label = c("Primary",
"Secondary"), class = "factor"), blood = c(5.5, 8.7, 10,
11, 10, 9)), .Names = c("id", "age", "edu", "blood"), class = "data.frame", row.names = c(NA,
-6L))