R/tidyr:转向更宽的格式并返回更长的格式以完成年份数据

R/tidyr: Pivot to wider format and back to longer format to complete year data

我生成的 table 看起来像这样:

my_data <- tibble(Year = c(rep(2020, 4), rep(2021, 12)), 
       Month = c(lubridate::month(1:4, label = TRUE), lubridate::month(1:12, label = TRUE)), 
       foo = 16:1, bar = 1:16)

    Year Month   foo   bar
   <dbl> <ord> <int> <int>
 1  2020 Jan      16     1
 2  2020 Feb      15     2
 3  2020 Mär      14     3
 4  2020 Apr      13     4
 5  2021 Jan      12     5
 6  2021 Feb      11     6
 7  2021 Mär      10     7
 8  2021 Apr       9     8
 9  2021 Mai       8     9
10  2021 Jun       7    10
11  2021 Jul       6    11
12  2021 Aug       5    12
13  2021 Sep       4    13
14  2021 Okt       3    14
15  2021 Nov       2    15
16  2021 Dez       1    16

我现在想使用 pivot_wider 将我的 table 转换为宽格式,然后转回较长格式,以便在我的 table 中包含缺失的月份。 foo 和 bar 中的值应为 0.

tidyr::pivot_wider(my_data, names_from = Year, values_from = foo:bar, values_fill = 0)

  Month foo_2020 foo_2021 bar_2020 bar_2021
   <ord>    <int>    <int>    <int>    <int>
 1 Jan         16       12        1        5
 2 Feb         15       11        2        6
 3 Mär         14       10        3        7
 4 Apr         13        9        4        8
 5 Mai          0        8        0        9
 6 Jun          0        7        0       10
 7 Jul          0        6        0       11
 8 Aug          0        5        0       12
 9 Sep          0        4        0       13
10 Okt          0        3        0       14
11 Nov          0        2        0       15
12 Dez          0        1        0       16

预期输出:

    Year Month   foo   bar
   <dbl> <ord> <dbl> <dbl>
 1  2020 Jan      16     1
 2  2020 Feb      15     2
 3  2020 Mär      14     3
 4  2020 Apr      13     4
 5  2020 Mai       0     0
 6  2020 Jun       0     0
 7  2020 Jul       0     0
 8  2020 Aug       0     0
 9  2020 Sep       0     0
10  2020 Okt       0     0
11  2020 Nov       0     0
12  2020 Dez       0     0
13  2021 Jan      12     5
14  2021 Feb      11     6
15  2021 Mär      10     7
16  2021 Apr       9     8
17  2021 Mai       8     9
18  2021 Jun       7    10
19  2021 Jul       6    11
20  2021 Aug       5    12
21  2021 Sep       4    13
22  2021 Okt       3    14
23  2021 Nov       2    15
24  2021 Dez       1    16

使用 pivot_longer() 函数中的“names_sep”选项

df <- structure(list(Month = c("Jan", "Feb", "Mär", "Apr", "Mai", "Jun", "Jul", "Aug", "Sep", "Okt", "Nov", "Dez"), 
                 foo_2020 = c(16L, 15L, 14L, 13L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), 
                 foo_2021 = 12:1, 
                 bar_2020 = c(1L, 2L, 3L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), 
                 bar_2021 = 5:16), class = "data.frame", row.names = c(NA, -12L))

library(tidyr)    

pivot_longer(df, -Month, names_sep = "_", names_to=c(".value", "Year"))

您不需要旋转两次。这就是 tidyr::complete 的用途:

complete(my_data, expand(my_data, Year, Month), fill = list(foo = 0, bar = 0))
#>    Year Month foo bar
#> 1  2020   Jan  16   1
#> 2  2020   Feb  15   2
#> 3  2020   Mar  14   3
#> 4  2020   Apr  13   4
#> 5  2020   May   0   0
#> 6  2020   Jun   0   0
#> 7  2020   Jul   0   0
#> 8  2020   Aug   0   0
#> 9  2020   Sep   0   0
#> 10 2020   Oct   0   0
#> 11 2020   Nov   0   0
#> 12 2020   Dec   0   0
#> 13 2021   Jan  12   5
#> 14 2021   Feb  11   6
#> 15 2021   Mar  10   7
#> 16 2021   Apr   9   8
#> 17 2021   May   8   9
#> 18 2021   Jun   7  10
#> 19 2021   Jul   6  11
#> 20 2021   Aug   5  12
#> 21 2021   Sep   4  13
#> 22 2021   Oct   3  14
#> 23 2021   Nov   2  15
#> 24 2021   Dec   1  16

另一个可能的解决方案:

library(tidyverse)

months <- c("Jan", "Feb", "Mar", "Apr", "Mai", "Jun", "Jul", "Aug", "Sep", "Okt", "Nov", "Dez")

my_data %>% 
  group_by(Year) %>% 
  group_modify(~ add_row(.x, Month = setdiff(months, .x$Month))) %>% 
  ungroup %>% 
  mutate(across(foo:bar, replace_na, 0))

#> # A tibble: 27 × 4
#>     Year Month   foo   bar
#>    <dbl> <chr> <int> <int>
#>  1  2020 Jan      16     1
#>  2  2020 Feb      15     2
#>  3  2020 Mar      14     3
#>  4  2020 Apr      13     4
#>  5  2020 Mai       0     0
#>  6  2020 Jun       0     0
#>  7  2020 Jul       0     0
#>  8  2020 Aug       0     0
#>  9  2020 Sep       0     0
#> 10  2020 Okt       0     0
#> # … with 17 more rows