R:将宽数据帧转换为超过两列的纵向格式

R: To convert wide dataframe into longitudinal format with more than more than two columns

我有如下数据框:

> head(n)
# A tibble: 6 x 23
  `Record ID of REGN DATA` Pain_1 Pain_2 Redness_1 Redness_2 Swelling_1 Swelling_2
                     <dbl> <chr>  <chr>  <chr>     <chr>     <chr>      <chr>     
1                        1 Yes    Yes    No        No        No         No        
2                        2 No     Yes    No        No        No         No        
3                        3 Yes    No     No        No        No         No        
4                        4 Yes    Yes    No        No        Yes        Yes       
5                        5 No     No     No        No        No         No        
6                        6 No     No     No        No        No         No       

Pain_1Pain_2分别是患者第一次就诊和第二次就诊时症状“疼痛”的答案。变量RednessSwelling也是如此。我想将数据框转换为每个症状的纵向数据框 PainRednessSwelling 以及一个标志变量以显示访问次数,如下所示。我尝试使用 gather 函数将所有症状转换为一列。有好心人帮忙吗

> head(tr)
# A tibble: 6 x 5
  `Record ID of REGN DATA` Pain  Redness Swelling Visit
                     <dbl> <chr> <chr>   <chr>    <dbl>
1                        1 Yes   No      No           1
2                        2 No    No      No           1
3                        3 Yes   No      No           1
4                        1 Yes   No      No           2
5                        2 Yes   No      No           2
6                        3 No    No      No           2

这是示例数据

structure(list(`Record ID of REGN DATA` = c(1, 2, 3, 4, 5, 6, 
7, 8, 9, 10), Pain_1 = c("Yes", "No", "Yes", "Yes", "No", "No", 
"Yes", "Yes", "Yes", "Yes"), Pain_2 = c("Yes", "Yes", "No", "Yes", 
"No", "No", "No", "Yes", "Yes", "Yes"), Redness_1 = c("No", "No", 
"No", "No", "No", "No", "Yes", "Yes", "No", "No"), Redness_2 = c("No", 
"No", "No", "No", "No", "No", "No", "Yes", "No", "No"), Swelling_1 = c("No", 
"No", "No", "Yes", "No", "No", "No", "Yes", "No", "Yes"), Swelling_2 = c("No", 
"No", "No", "Yes", "No", "No", "No", "Yes", "No", "Yes"), Tiredness_1 = c("Yes", 
"No", "Yes", "Yes", "No", "No", "No", "No", "No", "Yes"), Tiredness_2 = c("Yes", 
"Yes", "No", "Yes", "No", "No", "Yes", "No", "Yes", "Yes"), Headache_1 = c("No", 
"No", "No", "Yes", "No", "No", "No", "No", "No", "No"), Headache_2 = c("No", 
"Yes", "No", "Yes", "No", "No", "No", "Yes", "No", "No"), Muscle_1 = c("Yes", 
"No", "Yes", "Yes", "No", "No", "No", "No", "No", "No"), Muscle_2 = c("Yes", 
"Yes", "No", "No", "No", "No", "Yes", "Yes", "No", "No"), Chills_1 = c("No", 
"No", "Yes", "No", "No", "No", "No", "No", "No", "No"), Chills_2 = c("No", 
"Yes", "No", "No", "No", "No", "Yes", "No", "No", "No"), Fever_1 = c("Yes", 
"No", "No", "No", "No", "No", "No", "No", "Yes", "Yes"), Fever_2 = c("Yes", 
"Yes", "No", "No", "No", "No", "Yes", "No", "No", "No"), Nausea_1 = c("No", 
"No", "No", "No", "No", "No", "No", "No", "No", "No"), Nausea_2 = c("No", 
"No", "No", "No", "No", "No", "No", "Yes", "No", "No"), JointPain_1 = c("Yes", 
"No", "Yes", "No", "No", "No", "No", "No", "Yes", "No"), JointPain_2 = c("Yes", 
"No", "No", "No", "No", "No", "Yes", "No", "No", "No"), `Allergic reaction_1` = c("No", 
"No", "No", "No", "No", "No", "No", "No", "No", "No"), `Allergic reaction_2` = c("No", 
"No", "No", "No", "No", "No", "No", "No", "No", "No")), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"))

一种方法是 pivot_longer,然后您可以使用 separate 获取访问次数,然后您可以过滤到感兴趣的变量,然后 pivot_wider 获取到您的预期输出。 (如果我没看错你在找什么)

library(tidyverse)

df %>%
  pivot_longer(!`Record ID of REGN DATA`,
               names_to = "name",
               values_to = "value") %>%
  separate(name, c("name", "visit"), sep = "_") %>%
  filter(name %in% c("Pain", "Redness", "Swelling")) %>%
  pivot_wider(names_from = "name", values_from = "value") %>%
  select(1, 3:5, 2)

输出

# A tibble: 20 × 5
   `Record ID of REGN DATA` Pain  Redness Swelling visit
                      <dbl> <chr> <chr>   <chr>    <chr>
 1                        1 Yes   No      No       1    
 2                        1 Yes   No      No       2    
 3                        2 No    No      No       1    
 4                        2 Yes   No      No       2    
 5                        3 Yes   No      No       1    
 6                        3 No    No      No       2    
 7                        4 Yes   No      Yes      1    
 8                        4 Yes   No      Yes      2    
 9                        5 No    No      No       1    
10                        5 No    No      No       2    
11                        6 No    No      No       1    
12                        6 No    No      No       2    
13                        7 Yes   Yes     No       1    
14                        7 No    No      No       2    
15                        8 Yes   Yes     Yes      1    
16                        8 Yes   Yes     Yes      2    
17                        9 Yes   No      No       1    
18                        9 Yes   No      No       2    
19                       10 Yes   No      Yes      1    
20                       10 Yes   No      Yes      2   

使用 pivot_longernames_sep 参数的更直接的方法:

library(tidyr)

df %>% 
  pivot_longer(
    cols = -1,
    names_to = c(".value", "visit"),
    names_sep = "_"
  )
  `Record ID of REG~ visit Pain  Redness Swelling Tiredness Headache Muscle Chills Fever Nausea JointPain `Allergic react~
                <dbl> <chr> <chr> <chr>   <chr>    <chr>     <chr>    <chr>  <chr>  <chr> <chr>  <chr>     <chr>           
 1                  1 1     Yes   No      No       Yes       No       Yes    No     Yes   No     Yes       No              
 2                  1 2     Yes   No      No       Yes       No       Yes    No     Yes   No     Yes       No              
 3                  2 1     No    No      No       No        No       No     No     No    No     No        No              
 4                  2 2     Yes   No      No       Yes       Yes      Yes    Yes    Yes   No     No        No              
 5                  3 1     Yes   No      No       Yes       No       Yes    Yes    No    No     Yes       No              
 6                  3 2     No    No      No       No        No       No     No     No    No     No        No              
 7                  4 1     Yes   No      Yes      Yes       Yes      Yes    No     No    No     No        No              
 8                  4 2     Yes   No      Yes      Yes       Yes      No     No     No    No     No        No              
 9                  5 1     No    No      No       No        No       No     No     No    No     No        No              
10                  5 2     No    No      No       No        No       No     No     No    No     No        No              
11                  6 1     No    No      No       No        No       No     No     No    No     No        No              
12                  6 2     No    No      No       No        No       No     No     No    No     No        No              
13                  7 1     Yes   Yes     No       No        No       No     No     No    No     No        No              
14                  7 2     No    No      No       Yes       No       Yes    Yes    Yes   No     Yes       No              
15                  8 1     Yes   Yes     Yes      No        No       No     No     No    No     No        No              
16                  8 2     Yes   Yes     Yes      No        Yes      Yes    No     No    Yes    No        No              
17                  9 1     Yes   No      No       No        No       No     No     Yes   No     Yes       No              
18                  9 2     Yes   No      No       Yes       No       No     No     No    No     No        No              
19                 10 1     Yes   No      Yes      Yes       No       No     No     Yes   No     No        No              
20                 10 2     Yes   No      Yes      Yes       No       No     No     No    No     No        No