R:通过为特定元素创建列来重塑数据框(控制处理)

R: reshape a dataframe by creating columns for a specific element (control treatment)

考虑一个显示对照结果的数据框和针对男性和女性的两种实验处理以及每种处理的大小:

library(tidyverse)
mydf <- data_frame( treatment = c('ctrl','low','high','ctrl','low','high'), 
                gender = c('male','male','male','female','female','female'),
                size = c(10,20,30,10,20,30),
                result = c(0.11, 0.32, 0.25, 0.15, 0.38, 0.55) )

treatment  gender   size results
 ctrl       male     10    0.11
 low        male     20    0.32
 high       male     30    0.25
 ctrl       female   10    0.15
 low        female   20    0.35
 high       female   30    0.55

为了并排比较对照和实验处理,我想按如下方式重塑数据框:

treatment gender   ctrl_size    size   ctrl_result  result
   low     female        10      20        0.15      0.38
  high     female        10      30        0.15      0.55
   low     male          10      20        0.11      0.32
  high     male          10      30        0.11      0.25

我的以下尝试有效,但对我来说似乎很麻烦,因为它在将辅助数据帧合并到最终数据帧之前创建了辅助数据帧:

mydf_result <- mydf %>%
  select(-size) %>% 
  spread(treatment, result) %>%
  gather(treatment, result, c(low, high) ) %>% 
  rename(ctrl_result = ctrl)

mydf_size <- mydf %>%
  select(-result) %>%  
  spread(treatment, size) %>% 
  gather(treatment, size, c(low, high))  %>% 
  rename(ctrl_size = ctrl)

mydf_final <- 
  full_join(mydf_result, mydf_size, by = c('treatment', 'gender')) %>% 
  select(treatment, gender, ctrl_size, size, ctrl_result, result) %>%
  arrange(gender)

# A tibble: 4 × 6
  treatment gender ctrl_size  size ctrl_result result
      <chr>  <chr>     <dbl> <dbl>       <dbl>  <dbl>
1       low female        10    20        0.15   0.38
2      high female        10    30        0.15   0.55
3       low   male        10    20        0.11   0.32
4      high   male        10    30        0.11   0.25

以上能否在一个流水线内实现?

虽然我不确定想要的结果是否是最整洁的排列,但您可以这样重新排列:

library(tidyverse)

mydf %>% gather(var, val, size, result) %>%    # gather all numbers into one column
    spread(treatment, val) %>%    # spread treatment so ctrl can be separated
    gather(treatment, ttmt, high, low) %>%    # regather high and low separately
    gather(ct_tm, val, ctrl, ttmt) %>%    # regather numbers, now with ctrl/ttmt separated
    unite(var, ct_tm, var) %>%    # join column labels
    spread(var, val)    # spread to wide

## # A tibble: 4 × 6
##   gender treatment ctrl_result ctrl_size ttmt_result ttmt_size
## *  <chr>     <chr>       <dbl>     <dbl>       <dbl>     <dbl>
## 1 female      high        0.15        10        0.55        30
## 2 female       low        0.15        10        0.38        20
## 3   male      high        0.11        10        0.25        30
## 4   male       low        0.11        10        0.32        20

这可以通过加入 data.table

来完成
library(data.table)
setnames(setDT(mydf)[treatment!="ctrl"][mydf[treatment=="ctrl"],
  on = "gender"], c("i.size", "i.result"), c("ctrl_size", "ctrl_result"))[,
                  i.treatment := NULL][]
#    treatment gender size result ctrl_size ctrl_result
#1:       low   male   20   0.32        10        0.11
#2:      high   male   30   0.25        10        0.11
#3:       low female   20   0.38        10        0.15
#4:      high female   30   0.55        10        0.15