R:通过为特定元素创建列来重塑数据框(控制处理)
R: reshape a dataframe by creating columns for a specific element (control treatment)
考虑一个显示对照结果的数据框和针对男性和女性的两种实验处理以及每种处理的大小:
library(tidyverse)
mydf <- data_frame( treatment = c('ctrl','low','high','ctrl','low','high'),
gender = c('male','male','male','female','female','female'),
size = c(10,20,30,10,20,30),
result = c(0.11, 0.32, 0.25, 0.15, 0.38, 0.55) )
treatment gender size results
ctrl male 10 0.11
low male 20 0.32
high male 30 0.25
ctrl female 10 0.15
low female 20 0.35
high female 30 0.55
为了并排比较对照和实验处理,我想按如下方式重塑数据框:
treatment gender ctrl_size size ctrl_result result
low female 10 20 0.15 0.38
high female 10 30 0.15 0.55
low male 10 20 0.11 0.32
high male 10 30 0.11 0.25
我的以下尝试有效,但对我来说似乎很麻烦,因为它在将辅助数据帧合并到最终数据帧之前创建了辅助数据帧:
mydf_result <- mydf %>%
select(-size) %>%
spread(treatment, result) %>%
gather(treatment, result, c(low, high) ) %>%
rename(ctrl_result = ctrl)
mydf_size <- mydf %>%
select(-result) %>%
spread(treatment, size) %>%
gather(treatment, size, c(low, high)) %>%
rename(ctrl_size = ctrl)
mydf_final <-
full_join(mydf_result, mydf_size, by = c('treatment', 'gender')) %>%
select(treatment, gender, ctrl_size, size, ctrl_result, result) %>%
arrange(gender)
# A tibble: 4 × 6
treatment gender ctrl_size size ctrl_result result
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 low female 10 20 0.15 0.38
2 high female 10 30 0.15 0.55
3 low male 10 20 0.11 0.32
4 high male 10 30 0.11 0.25
以上能否在一个流水线内实现?
虽然我不确定想要的结果是否是最整洁的排列,但您可以这样重新排列:
library(tidyverse)
mydf %>% gather(var, val, size, result) %>% # gather all numbers into one column
spread(treatment, val) %>% # spread treatment so ctrl can be separated
gather(treatment, ttmt, high, low) %>% # regather high and low separately
gather(ct_tm, val, ctrl, ttmt) %>% # regather numbers, now with ctrl/ttmt separated
unite(var, ct_tm, var) %>% # join column labels
spread(var, val) # spread to wide
## # A tibble: 4 × 6
## gender treatment ctrl_result ctrl_size ttmt_result ttmt_size
## * <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 female high 0.15 10 0.55 30
## 2 female low 0.15 10 0.38 20
## 3 male high 0.11 10 0.25 30
## 4 male low 0.11 10 0.32 20
这可以通过加入 data.table
来完成
library(data.table)
setnames(setDT(mydf)[treatment!="ctrl"][mydf[treatment=="ctrl"],
on = "gender"], c("i.size", "i.result"), c("ctrl_size", "ctrl_result"))[,
i.treatment := NULL][]
# treatment gender size result ctrl_size ctrl_result
#1: low male 20 0.32 10 0.11
#2: high male 30 0.25 10 0.11
#3: low female 20 0.38 10 0.15
#4: high female 30 0.55 10 0.15
考虑一个显示对照结果的数据框和针对男性和女性的两种实验处理以及每种处理的大小:
library(tidyverse)
mydf <- data_frame( treatment = c('ctrl','low','high','ctrl','low','high'),
gender = c('male','male','male','female','female','female'),
size = c(10,20,30,10,20,30),
result = c(0.11, 0.32, 0.25, 0.15, 0.38, 0.55) )
treatment gender size results
ctrl male 10 0.11
low male 20 0.32
high male 30 0.25
ctrl female 10 0.15
low female 20 0.35
high female 30 0.55
为了并排比较对照和实验处理,我想按如下方式重塑数据框:
treatment gender ctrl_size size ctrl_result result
low female 10 20 0.15 0.38
high female 10 30 0.15 0.55
low male 10 20 0.11 0.32
high male 10 30 0.11 0.25
我的以下尝试有效,但对我来说似乎很麻烦,因为它在将辅助数据帧合并到最终数据帧之前创建了辅助数据帧:
mydf_result <- mydf %>%
select(-size) %>%
spread(treatment, result) %>%
gather(treatment, result, c(low, high) ) %>%
rename(ctrl_result = ctrl)
mydf_size <- mydf %>%
select(-result) %>%
spread(treatment, size) %>%
gather(treatment, size, c(low, high)) %>%
rename(ctrl_size = ctrl)
mydf_final <-
full_join(mydf_result, mydf_size, by = c('treatment', 'gender')) %>%
select(treatment, gender, ctrl_size, size, ctrl_result, result) %>%
arrange(gender)
# A tibble: 4 × 6
treatment gender ctrl_size size ctrl_result result
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 low female 10 20 0.15 0.38
2 high female 10 30 0.15 0.55
3 low male 10 20 0.11 0.32
4 high male 10 30 0.11 0.25
以上能否在一个流水线内实现?
虽然我不确定想要的结果是否是最整洁的排列,但您可以这样重新排列:
library(tidyverse)
mydf %>% gather(var, val, size, result) %>% # gather all numbers into one column
spread(treatment, val) %>% # spread treatment so ctrl can be separated
gather(treatment, ttmt, high, low) %>% # regather high and low separately
gather(ct_tm, val, ctrl, ttmt) %>% # regather numbers, now with ctrl/ttmt separated
unite(var, ct_tm, var) %>% # join column labels
spread(var, val) # spread to wide
## # A tibble: 4 × 6
## gender treatment ctrl_result ctrl_size ttmt_result ttmt_size
## * <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 female high 0.15 10 0.55 30
## 2 female low 0.15 10 0.38 20
## 3 male high 0.11 10 0.25 30
## 4 male low 0.11 10 0.32 20
这可以通过加入 data.table
library(data.table)
setnames(setDT(mydf)[treatment!="ctrl"][mydf[treatment=="ctrl"],
on = "gender"], c("i.size", "i.result"), c("ctrl_size", "ctrl_result"))[,
i.treatment := NULL][]
# treatment gender size result ctrl_size ctrl_result
#1: low male 20 0.32 10 0.11
#2: high male 30 0.25 10 0.11
#3: low female 20 0.38 10 0.15
#4: high female 30 0.55 10 0.15