R pivot_longer() 在输出中有 NAs

R pivot_longer() has NAs in output

我在旋转数据时遇到问题。我可以使用一种解决方法来生成我想要的输出(如下所示),但我怀疑在单个 pivot_longer() 调用中是可能的,我只是无法弄清楚命令。任何 advice/help 将不胜感激。

library(dplyr)
library(tidyr)

## Data
df <- structure(list(id_name = c("ID_108", "ID_109", "ID_112", 
                                 "ID_113", "ID_115", "ID_116", "ID_119", "ID_12", "ID_132", 
                                 "ID_134", "ID_142", "ID_145", "ID_15", "ID_151", "ID_152", 
                                 "ID_158", "ID_163", "ID_170", "ID_173", "ID_179", "ID_191", 
                                 "ID_199", "ID_202", "ID_203", "ID_204", "ID_205", "ID_208", 
                                 "ID_214", "ID_215", "ID_216", "ID_225", "ID_235", "ID_236", 
                                 "ID_241", "ID_242", "ID_26", "ID_28", "ID_37", "ID_44", 
                                 "ID_45", "ID_49", "ID_51", "ID_54", "ID_55", "ID_57", 
                                 "ID_60", "ID_64", "ID_69", "ID_71", "ID_72", "ID_75", 
                                 "ID_88", "ID_9", "ID_98"), 
                     thing_time_1.tab = c(NA, NA, 128, 91, 81, 112, NA, 142, NA, 
                                          NA, 371, NA, 0, 127, 207, 92, NA, NA, 
                                          554, 435, 830, 89, 112, 291, 133, NA, 
                                          197, 130, 484, NA, NA, NA, 126, NA, NA, 
                                          94, 98, NA, NA, 168, NA, 48, NA, 146, 54,
                                          NA, 173, 30, 425, NA, 58, 201, NA, NA), 
                     thing_1.tab = c(NA, NA, "(AB)", "(AB)", "(BB)", "(BB)", NA, "(AB)", NA, NA, "(BB)",
                                     NA, "(AB)", "(AB)", "(AB)", "(BA)", NA, NA, "(BB)", "(BB)", "(BB)",
                                     "(AB)", "(AB)", "(BB)", "(BB)", NA, "(AB)", "(AB)", "(BB)", NA, 
                                     NA, NA, "(BA)", NA, NA, "(AB)", "(BB)", NA, NA, "(BA)", NA, "(AB)", 
                                     NA, "(BA)", "(BA)", NA, "(AB)", "(BB)", "(BB)", NA, "(AB)", "(AB)", 
                                     NA, NA),
                     thing_time_2.tab = c(NA, 212, NA, 183, NA, 118, NA,  NA, 5519, 157, NA, NA, 437, NA, 
                                          NA, 359, NA, 342, NA, NA, NA, 179, NA, 386, 176, 47, NA, 177,
                                          561, NA, NA, NA, NA, NA, 70, NA, NA, 68, NA, 238, NA, 194, NA, 
                                          301, NA, NA, 263, 243, 750,  904, 126, 399, NA, 124), 
                     thing_2.tab = c(NA, "(BA)", NA, "(BB)", NA, "(BA)", NA, NA, "(BB)", "(AA)", NA, NA, 
                                     "(BA)", NA, NA, "(BB)", NA, "(BB)", NA, NA, NA, "(AB)", NA, "(BB)",
                                     "(BB)", "(BB)", NA, "(BB)", "(BB)", NA, NA, NA, NA, NA, "(BB)", NA, 
                                     NA, "(BA)", NA, "(BB)", NA, "(AB)", NA, "(AB)", NA, NA, "(BB)", "(BA)", 
                                     "(BB)", "(BA)", "(AB)", "(BB)", NA, "(BA)"), 
                     thing_time_3.tab = c(167, 251, NA, 259, NA, NA, NA, NA, 5580, NA, NA, NA, NA, NA, NA, NA, 
                                          NA, 370, NA, NA, NA, NA, NA, 411, NA, 174, NA, NA, 656, NA, NA, 
                                          NA, NA, NA, NA, NA, NA, 120, NA, NA, NA, NA, 334, NA, NA, NA, 
                                          NA, NA, NA, 941, NA, NA, NA, 192), 
                     thing_3.tab = c("(BB)", "(AB)", NA, "(BB)", NA, NA, NA, NA, "(AB)", NA, NA, NA, NA, NA, NA, NA, 
                                     NA, "(BB)", NA, NA, NA, NA, NA, "(BB)", NA, "(BB)", NA, NA, "(BA)", 
                                     NA, NA, NA, NA, NA, NA, NA, NA, "(AB)", NA, NA, NA, NA, "(AB)", 
                                     NA, NA, NA, NA, NA, NA, "(BB)", NA, NA, NA, "(AB)"), 
                     thing_time_4.tab = c(NA,  575, NA, NA, NA, NA, NA, NA, 5615, NA, NA, NA, NA, NA, NA, NA, 
                                          NA, 401, NA, NA, NA, NA, NA, NA, 232, NA, NA, NA, NA, 319, NA, 
                                          NA, NA, NA, NA, NA, NA, 215, NA, NA, NA, NA, NA, NA, NA, NA, 
                                          NA, NA, NA, NA, NA, NA, NA, 223),
                     thing_4.tab = c(NA, "(BB)", NA, NA, NA, NA, NA, NA, "(AB)", NA, NA, NA, NA, NA, NA, NA, NA, 
                                     "(BB)", NA, NA, NA, NA, NA, NA, "(BB)", NA, NA, NA, NA, "(BB)", 
                                     NA, NA, NA, NA, NA, NA, NA, "(BB)", NA, NA, NA, NA, NA, NA, NA, 
                                     NA, NA, NA, NA, NA, NA, NA, NA, "(BB)"), 
                     thing_time_5.tab = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                          421, NA, NA, NA, NA, NA, NA, 278, NA, NA, NA, NA, 370, NA, NA, 
                                          NA, 451, NA, NA, NA, 280, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                          NA, NA, NA, NA, NA, NA, 507),
                     thing_5.tab = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "(BB)", NA, 
                                     NA, NA, NA, NA, NA, "(BA)", NA, NA, NA, NA, "(BB)", NA, NA, NA, 
                                     "(BB)", NA, NA, NA, "(BB)", NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                     NA, NA, NA, NA, NA, NA, "(BB)"), 
                     thing_time_6.tab = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                          NA, NA, NA, NA, NA, NA, 445, NA, NA, NA, NA, NA, NA, NA, NA, 
                                          NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                          NA, NA, NA, NA, NA),
                     thing_6.tab = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                     NA, NA, "(BA)", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                     NA), 
                     thing_time_7.tab = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                          NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                          NA, NA, NA, NA, NA, NA, 576, NA, NA, NA, NA, NA, NA, NA, NA, 
                                          NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
                     thing_7.tab = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "(BB)", 
                                     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                     NA, NA, NA, NA, NA, NA), 
                     thing_time_8.tab = c(NA, NA, NA, NA,  NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                          NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 719, NA, NA, NA, 
                                          NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                          NA, NA, NA), 
                     thing_8.tab = c(NA, NA, NA, NA, NA, NA, NA, NA, 
                                     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                     NA, NA, NA, NA, NA, NA, NA, "(BB)", NA, NA, NA, NA, NA, NA, NA, 
                                     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), 
                row.names = c(NA, -54L), class = "data.frame")

我想要的结果(df_2"):

## Outcome I want ("df_2")
part1 <- df %>%
  pivot_longer(-id_name,
               names_to = c("item", ".value"),
               names_sep = "[_.]",
               values_transform = list(.value = as.character)) %>%
  select(id_name, time) %>%
  na.omit()
#> Warning: Expected 2 pieces. Additional pieces discarded in 16 rows [1, 2, 3, 4,
#> 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16].

part2 <- df %>%
  pivot_longer(-id_name,
               names_to = c("item", ".value"),
               names_sep = "[_.]",
               values_transform = list(.value = as.character)) %>%
  select(-c(item, time)) %>%
  pivot_longer(-id_name) %>%
  select(-name) %>%
  na.omit()
#> Warning: Expected 2 pieces. Additional pieces discarded in 16 rows [1, 2, 3, 4,
#> 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16].

df_2 <- part2 %>%
  bind_cols(part1) %>%
  select(-`id_name...3`) %>%
  rename("ID" = "id_name...1",
         "Thing" = "value",
         "Time" = "time")
#> New names:
#> • `id_name` -> `id_name...1`
#> • `id_name` -> `id_name...3`
df_2
#> # A tibble: 86 × 3
#>    ID     Thing  Time
#>    <chr>  <chr> <dbl>
#>  1 ID_108 (BB)    167
#>  2 ID_109 (BA)    212
#>  3 ID_109 (AB)    251
#>  4 ID_109 (BB)    575
#>  5 ID_112 (AB)    128
#>  6 ID_113 (AB)     91
#>  7 ID_113 (BB)    183
#>  8 ID_113 (BB)    259
#>  9 ID_115 (BB)     81
#> 10 ID_116 (BB)    112
#> # … with 76 more rows

我的尝试:

## My failed attempts:
df %>%
  pivot_longer(-id_name,
               names_to = c("item", ".value"),
               names_pattern = "(.+_.+)\_(\d?)")
#> # A tibble: 486 × 10
#>    id_name item         `1`   `2`   `3`   `4`   `5`   `6`   `7`   `8`
#>    <chr>   <chr>      <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 ID_108  thing_time    NA    NA   167    NA    NA    NA    NA    NA
#>  2 ID_108  <NA>          NA    NA    NA    NA    NA    NA    NA    NA
#>  3 ID_108  <NA>          NA    NA    NA    NA    NA    NA    NA    NA
#>  4 ID_108  <NA>          NA    NA    NA    NA    NA    NA    NA    NA
#>  5 ID_108  <NA>          NA    NA    NA    NA    NA    NA    NA    NA
#>  6 ID_108  <NA>          NA    NA    NA    NA    NA    NA    NA    NA
#>  7 ID_108  <NA>          NA    NA    NA    NA    NA    NA    NA    NA
#>  8 ID_108  <NA>          NA    NA    NA    NA    NA    NA    NA    NA
#>  9 ID_108  <NA>          NA    NA    NA    NA    NA    NA    NA    NA
#> 10 ID_109  thing_time    NA   212   251   575    NA    NA    NA    NA
#> # … with 476 more rows

## pivot-then-coalesce? Also no good...
df %>%
  pivot_longer(-id_name,
               names_to = c("item", ".value"),
               names_sep = "[_.]",
               values_transform = list(.value = as.character)) %>%
  group_by(id_name) %>%
  summarise(across(-item, ~max(., na.rm = TRUE)))
#> Warning: Expected 2 pieces. Additional pieces discarded in 16 rows [1, 2, 3, 4,
#> 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16].
#> # A tibble: 54 × 10
#>    id_name  time `1`   `2`   `3`   `4`   `5`   `6`   `7`   `8`  
#>    <chr>   <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#>  1 ID_108    167 <NA>  <NA>  (BB)  <NA>  <NA>  <NA>  <NA>  <NA> 
#>  2 ID_109    575 <NA>  (BA)  (AB)  (BB)  <NA>  <NA>  <NA>  <NA> 
#>  3 ID_112    128 (AB)  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA> 
#>  4 ID_113    259 (AB)  (BB)  (BB)  <NA>  <NA>  <NA>  <NA>  <NA> 
#>  5 ID_115     81 (BB)  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA> 
#>  6 ID_116    118 (BB)  (BA)  <NA>  <NA>  <NA>  <NA>  <NA>  <NA> 
#>  7 ID_119   -Inf <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA> 
#>  8 ID_12     142 (AB)  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA> 
#>  9 ID_132   5615 <NA>  (BB)  (AB)  (AB)  <NA>  <NA>  <NA>  <NA> 
#> 10 ID_134    157 <NA>  (AA)  <NA>  <NA>  <NA>  <NA>  <NA>  <NA> 
#> # … with 44 more rows

reprex package (v2.0.1)

于 2022-05-24 创建

这对你有用吗?

df %>%
   pivot_longer(-id_name,
                names_to = c(".value", "number"),
                names_pattern =   "([_a-z]+)_(\d)",
                values_transform = list(.value = as.character)) %>%
   # select(-number) %>%
   na.omit() 

# A tibble: 86 × 4
id_name number thing_time thing
<chr>   <chr>       <dbl> <chr>
1 ID_108  3             167 (BB) 
2 ID_109  2             212 (BA) 
3 ID_109  3             251 (AB) 
4 ID_109  4             575 (BB) 
5 ID_112  1             128 (AB) 
6 ID_113  1              91 (AB) 
7 ID_113  2             183 (BB) 
8 ID_113  3             259 (BB) 
9 ID_115  1              81 (BB) 
10 ID_116  1             112 (BB)

它确实涉及一些 clean-up 的“thing-time”列标题。