更长时间地关注 R 问题

Pivot Longer in R Issues

我在这里查看了很多关于 Pivot Longer 的不同问题,但我似乎无法弄清楚如何让我的方案发挥作用。例如

这是我的数据样本

structure(list(co_number = c("C953543", "C988782", "C999839", 
"C1000378", "C1004886", "C939949"), co_rejection_date_1 = structure(c(NA, 
NA, NA, NA, 1645401600, 1637020800), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), co_rejection_category_1 = c(NA, NA, NA, NA, "CO Error", 
"CO Error"), co_rejector_1 = c(NA, NA, NA, NA, "Quality Check", 
"Quality Check"), co_rejection_rationale_1 = c(NA, NA, NA, NA, 
"Add Tech Pubs approver.", "Updated approvers."), co_rejection_date_2 = structure(c(NA, 
NA, NA, NA, NA, 1637280000), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), co_rejection_category_2 = c(NA, NA, NA, NA, NA, "CO Error"
), co_rejector_2 = c(NA, NA, NA, NA, NA, "Labeling"), co_rejection_rationale_2 = c(NA, 
NA, NA, NA, NA, "Need to correct CO number on drawing redlines."
), co_rejection_date_3 = structure(c(NA, NA, NA, NA, NA, 1638835200
), tzone = "UTC", class = c("POSIXct", "POSIXt")), co_rejection_category_3 = c(NA, 
NA, NA, NA, NA, "CO Error"), co_rejector_3 = c(NA, NA, NA, NA, 
NA, "Labeling"), co_rejection_rationale_3 = c(NA, NA, NA, NA, 
NA, "ZFIN 100170251: removed VI addendum config. Updated CA addendum config matrix attachment."
), co_rejection_date_4 = structure(c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), co_rejection_category_4 = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_), 
    co_rejector_4 = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), co_rejection_rationale_4 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), co_rejection_date_5 = structure(c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), tzone = "UTC", class = c("POSIXct", 
    "POSIXt")), co_rejection_category_5 = c(NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_
    ), co_rejector_5 = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), co_rejection_rationale_5 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), release_year = c("2022", "2022", "2022", 
    "2022", "2022", "2022")), row.names = c(NA, -6L), class = c("tbl_df", 
"tbl", "data.frame"))

我希望它看起来像:

co_number co_rejection_number co_rejection_date co_rejection_category co_rejection_rejector co_rejection_rationale
C953543 NA NA NA NA NA
C988782 NA NA NA NA NA
C999839 NA NA NA NA NA
C1000378 NA NA NA NA NA
C1004886 1 2022-02-21 CO Error Quality Check Add Tech Pubs approver.
C939949 1 2021-11-16 CO Error Quality Check Updated approvers.
C939949 2 2021-11-19 CO Error Labeling Need to correct CO number on drawing redlines.
C939949 3 2021-12-07 CO Error Labeling ZFIN 100170251: removed VI addendum config. Updated CA addendum config matrix attachment.

您需要使用“name_pattern”选项将列名与拒绝编号分开

library(tidyr)

answer<-pivot_longer(df, -c(co_number, release_year), 
                     names_pattern = "(.+)_(\d)", 
                     names_to = c(".value", "co_rejection_number"), 
                     values_drop_na = TRUE)

tail(answer, 10)
# A tibble: 4 × 7
  co_number release_year co_rejection_number co_rejection_date   co_rejection_category co_rejector   co_rejection_rationale                                                            
  <chr>     <chr>        <chr>               <dttm>              <chr>                 <chr>         <chr>                                                                             
1 C1004886  2022         1                   2022-02-21 00:00:00 CO Error              Quality Check Add Tech Pubs approver.                                                           
2 C939949   2022         1                   2021-11-16 00:00:00 CO Error              Quality Check Updated approvers.                                                                
3 C939949   2022         2                   2021-11-19 00:00:00 CO Error              Labeling      Need to correct CO number on drawing redlines.                                    
4 C939949   2022         3                   2021-12-07 00:00:00 CO Error              Labeling      ZFIN 100170251: removed VI addendum config. Updated CA addendum config matrix att…

现在在 co_numbers 中添加缺少的内容:

final <- full_join(answer, distinct(df[,c("co_number", "release_year")]))
final

 co_number release_year co_rejection_number co_rejection_date   co_rejection_category co_rejector   co_rejection_rationale                                                            
  <chr>     <chr>        <chr>               <dttm>              <chr>                 <chr>         <chr>                                                                             
1 C1004886  2022         1                   2022-02-21 00:00:00 CO Error              Quality Check Add Tech Pubs approver.                                                           
2 C939949   2022         1                   2021-11-16 00:00:00 CO Error              Quality Check Updated approvers.                                                                
3 C939949   2022         2                   2021-11-19 00:00:00 CO Error              Labeling      Need to correct CO number on drawing redlines.                                    
4 C939949   2022         3                   2021-12-07 00:00:00 CO Error              Labeling      ZFIN 100170251: removed VI addendum config. Updated CA addendum config matrix att…
5 C953543   2022         NA                  NA                  NA                    NA            NA                                                                                
6 C988782   2022         NA                  NA                  NA                    NA            NA                                                                                
7 C999839   2022         NA                  NA                  NA                    NA            NA                                                                                
8 C1000378  2022         NA                  NA                  NA                    NA            NA                                                                                

     

使用 data.table 的另一个选项,您可以添加 'na.rm=T' 作为参数以摆脱 NA。

library(data.table)
DT <- as.data.table(Test)
DT <- melt(DT, id.vars="co_number", variable.name="co_rejection_number", 
          measure = patterns("^co_rejection_date.*", "^co_rejection_category.*", "^co_rejector.*", "^co_rejection_rationale.*"), 
          value.name=c("co_rejection_date", "co_rejection_rejector", "co_rejection_category", "co_rejection_rationale"))