我们如何使用 pivot_longer 多列转换 R 中的数据集

How do we transform a dataset in R using pivot_longer with multiple columns

我已经查看了很多关于 SO 的帖子(包括 ),但还没有找到解决我需要做的事情的方法。

具有初始数据集:

df <- tribble(
~person,  ~initial_event_date , ~type_initial, ~visit_prior, ~day_cnt_prior, ~prior_visit_type, ~visit_after, ~day_cnt_after, ~visit_after_type,
'a' , '01-01-2020', 'repair' ,'N', '', '', 'Y','15', 'follow-up',
'b' , '01-17-2020', 'routine' ,'Y', '-4', 'repair', 'N','', '',
'c' , '02-11-2020', 'consult' ,'Y', '-2', 'routine', 'Y','22', 'follow-up',
'd' , '04-01-2020', 'repair' ,'N', '', '', 'Y','12', 'correction'
)

我想输出类似于下面的数据框,因为我打算使用 timevis 包在基于时间的绘图上可视化数据。

output <- tribble(
  ~person, ~event_date, ~instance, ~type, ~day_cnt,
  'a', '01-01-2020', 'initial'    ,'repair'     ,'0',
  'a', ''          , 'visit_after', 'follow-up' , '15',
  'b', '01-17-2020', 'initial'    , 'routine'   ,'0',
  'b', ''          , 'visit_prior','repair'     ,'-4',
  'c', '02-11-2020', 'initial'    , 'consult'   ,'0',
  'c', ''          , 'visit_prior', 'routine'   , '-2',
  'c', ''          , 'visit_after', 'follow-up' ,'22',
  'd', '04-01-2020', 'initial'    ,'repair'     ,'0',
  'd', ''          , 'visit_after', 'correction','12'
)

我尝试了 pivot_longer 的多种变体,例如:

df %>% pivot_longer(
  cols = c(type_initial,prior_visit_type, visit_after_type), 
  names_to = 'instance',
  values_to = 'day_cnt'
)

任何建议或其他 SO 帖子可能会指出我正在寻找的解决方案?

这会让你开始

library(tidyr)

 pivot_longer(df, 
 cols=c(initial_event_date, type_initial,prior_visit_type, visit_after_type, day_cnt_after), 
                names_to = c('.value', 'instance'), 
                names_sep = "_")

示例数据:

df<-structure(list(person = c("a", "b", "c", "d"), initial_event_date = c("01-01-2020", 
"01-17-2020", "02-11-2020", "04-01-2020"), type_initial = c("repair", 
"routine", "consult", "repair"), visit_prior = c("N", "Y", "Y", 
"N"), day_cnt_prior = c("", "-4", "-2", ""), prior_visit_type = c("", 
"repair", "routine", ""), visit_after = c("Y", "N", "Y", "Y"), 
    day_cnt_after = c("15", "", "22", "12"), visit_after_type = c("follow-up", 
    "", "follow-up", "correction")), row.names = c(NA, -4L), class = c("tbl_df", 
"tbl", "data.frame"))

输出:

person visit_prior day_cnt_prior visit_after instance initial    type    prior     visit        day  
   <chr>  <chr>       <chr>         <chr>       <chr>    <chr>      <chr>   <chr>     <chr>        <chr>
 1 a      N           ""            Y           event    01-01-2020 NA       NA        NA           NA  
 2 a      N           ""            Y           initial  NA         repair   NA        NA           NA  
 3 a      N           ""            Y           visit    NA         NA      ""         NA           NA  
 4 a      N           ""            Y           after    NA         NA       NA       "follow-up"   NA  
 5 a      N           ""            Y           cnt      NA         NA       NA        NA          "15" 
 6 b      Y           "-4"          N           event    01-17-2020 NA       NA        NA           NA  
 7 b      Y           "-4"          N           initial  NA         routine  NA        NA           NA  
 8 b      Y           "-4"          N           visit    NA         NA      "repair"   NA           NA  
 9 b      Y           "-4"          N           after    NA         NA       NA       ""            NA  
10 b      Y           "-4"          N           cnt      NA         NA       NA        NA          ""   
11 c      Y           "-2"          Y           event    02-11-2020 NA       NA        NA           NA  
12 c      Y           "-2"          Y           initial  NA         consult  NA        NA           NA  
13 c      Y           "-2"          Y           visit    NA         NA      "routine"  NA           NA  
14 c      Y           "-2"          Y           after    NA         NA       NA       "follow-up"   NA  
15 c      Y           "-2"          Y           cnt      NA         NA       NA        NA          "22" 
16 d      N           ""            Y           event    04-01-2020 NA       NA        NA           NA  
17 d      N           ""            Y           initial  NA         repair   NA        NA           NA  
18 d      N           ""            Y           visit    NA         NA      ""         NA           NA  
19 d      N           ""            Y           after    NA         NA       NA       "correction"  NA  
20 d      N           ""            Y           cnt      NA         NA       NA        NA          "12" 

可能不是最优雅的解决方案,但我能够使用以下步骤解决我自己的问题:

a <- df %>% 
  select(person,initial_event_date, type_initial) %>%
  mutate(visit_type = 'initial')
b <- df %>%
  filter(visit_prior == 'Y') %>%
  select(person, initial_event_date, prior_visit_type, day_cnt_prior) %>% 
  mutate(visit_type = 'visit_prior',
         day_cnt_prior = as.integer(day_cnt_prior))
c <- df %>% filter(visit_after == 'Y') %>%
  select(person, initial_event_date, visit_after_type, day_cnt_after) %>% 
  mutate(visit_type = 'visit_after',
         day_cnt_after = as.integer(day_cnt_after))

bind_rows(a,b,c) %>% 
  arrange(person) %>%
  mutate(visit_reason = dplyr::coalesce(type_initial, prior_visit_type, visit_after_type),
         visit_type   = dplyr::coalesce(visit_type),
         day_cnt      = dplyr::coalesce(day_cnt_after, day_cnt_prior)) %>% 
  select(person, initial_event_date,visit_type, visit_reason, day_cnt) %>% 
  replace_na(list(day_cnt = 0))