使用 broom:augment 拟合模型后如何保留原始列

How to preserve original columns after fitting model with broom:augment

我正在拟合一个 gam 并使用 broom::augment 来获取模型系数。但是,在 'augmenting' 我的模型之后,我丢失了数据集的一些原始列。我将需要这些列 pivot_longer 并制作一些图,日期沿 X 轴,地区沿 Y 轴。有没有办法保留这些列供以后使用? 这里有一个数据子集来解释我的意思:

#Make a model from the following data
library(tidyverse)
library(broom)
library(mgcv)

test <- structure(list(year = c(2020, 2020, 2020, 2020, 2020, 2020, 2020, 
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 
2020, 2020, 2020), period = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 
11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 1, 2, 3, 
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 
21, 22, 23, 24, 2, 3, 4, 5, 6, 7, 8, 12, 7, 8, 9, 10, 11, 12, 
13, 15, 19, 20, 21, 22, 3, 7, 11, 12, 13, 14, 15, 16, 19, 21, 
23, 24, 25, 28, 29, 2, 4, 5, 6, 7), district = c(221, 221, 221, 
221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 
221, 221, 221, 221, 221, 221, 221, 222, 222, 222, 222, 222, 222, 
222, 222, 222, 222, 222, 222, 222, 222, 223, 223, 223, 223, 223, 
223, 223, 223, 223, 223, 224, 224, 224, 224, 224, 224, 224, 224, 
226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 227, 
227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 
227, 228, 228, 228, 228, 228), date = structure(c(18454, 18458, 
18459, 18462, 18466, 18467, 18468, 18471, 18475, 18476, 18477, 
18479, 18482, 18485, 18487, 18489, 18491, 18493, 18495, 18498, 
18499, 18500, 18501, 18466, 18471, 18475, 18479, 18482, 18485, 
18487, 18489, 18491, 18493, 18495, 18497, 18499, 18501, 18463, 
18466, 18471, 18475, 18479, 18482, 18485, 18487, 18489, 18491, 
18459, 18462, 18466, 18471, 18475, 18479, 18482, 18491, 18435, 
18438, 18442, 18445, 18447, 18449, 18452, 18456, 18485, 18487, 
18489, 18497, 18421, 18431, 18440, 18442, 18445, 18447, 18449, 
18452, 18459, 18463, 18475, 18479, 18482, 18489, 18491, 18462, 
18471, 18475, 18479, 18482), class = "Date"), pinkcount = c(2153732, 
2074853, 526934, 721057, 728884, 116370, 231951, 750551, 803772, 
76330, 105696, 386568, 156750, 8243, 26856, 16036, 40262, 24827, 
6404, 2183, 1206, 1438, 2888, 177054, 392059, 179505, 472985, 
489168, 639106, 341042, 169540, 153864, 86039, 126288, 146441, 
36259, 15654, 100867, 51319, 78043, 51462, 57584, 33914, 470795, 
197139, 47556, 17062, 93562, 123367, 55249, 175281, 190319, 188759, 
64537, 1871, 5, 158, 171, 224, 973, 1547, 1395, 262, 1145554, 
829993, 634609, 104113, 3, 8, 8, 134, 868, 452, 14899, 1848, 
2449, 10365, 64608, 64016, 56468, 16191, 17130, 37070, 127510, 
139808, 29274, 9985), airtemp_f = c(56.675652173913, 57.7475, 
57.1775, 57.035, 56.5025, 56.765, 54.92, 57.572, 58.22, 57.2975, 
56.585, 56.6075, 55.7375, 56.12, 56.63, 59.8025, 58.2330434782609, 
57.11, 61.82, 56.6825, 55.97, 55.7825, 54.875, 56.5025, 57.572, 
58.22, 56.6075, 55.7375, 56.12, 56.63, 59.8025, 58.2330434782609, 
57.11, 61.82, 57.59, 55.97, 54.875, 56.7725, 56.5025, 57.572, 
58.22, 56.6075, 55.7375, 56.12, 56.63, 59.8025, 58.2330434782609, 
57.1775, 57.035, 56.5025, 57.572, 58.22, 56.6075, 55.7375, 58.2330434782609, 
52.2275, 51.11, 52.205, 62.39, 59.18, 55.0475, 55.58, 55.37, 
56.12, 56.63, 59.8025, 57.59, 47.9225, 51.3425, 54.6575, 52.205, 
62.39, 59.18, 55.0475, 55.58, 57.1775, 56.7725, 58.22, 56.6075, 
55.7375, 59.8025, 58.2330434782609, 57.035, 57.572, 58.22, 56.6075, 
55.7375), watertemp_f = c(58.2408695652174, 58.325, 59.405, 59.405, 
57.875, 58.2575, 57.3575, 58.52, 57.935, 57.995, 57.2825, 57.29, 
56.915, 56.78, 57.785, 58.445, 59.0547826086957, 58.7675, 60.3575, 
58.835, 58.625, 58.16, 57.77, 57.875, 58.52, 57.935, 57.29, 56.915, 
56.78, 57.785, 58.445, 59.0547826086957, 58.7675, 60.3575, 59.2025, 
58.625, 57.77, 58.2125, 57.875, 58.52, 57.935, 57.29, 56.915, 
56.78, 57.785, 58.445, 59.0547826086957, 59.405, 59.405, 57.875, 
58.52, 57.935, 57.29, 56.915, 59.0547826086957, 53.8475, 53.7275, 
54.3875, 55.0775, 56.405, 55.475, 55.958, 57.2225, 56.78, 57.785, 
58.445, 59.2025, 47.7875, 52.67, 54.755, 54.3875, 55.0775, 56.405, 
55.475, 55.958, 59.405, 58.2125, 57.935, 57.29, 56.915, 58.445, 
59.0547826086957, 59.405, 58.52, 57.935, 57.29, 56.915), rainfall_inch = c(0.0393700787401575, 
0, 0.031496062992126, 1.68897637795276, 0.570866141732283, 1.54330708661417, 
5.36220472440945, 0, 1.8503937007874, 4.09055118110236, 2.85826771653543, 
0.110236220472441, 1.44094488188976, 1.49212598425197, 0.0118110236220472, 
0, 0, 0.0708661417322835, 0, 0.570866141732283, 3.1496062992126, 
2.85826771653543, 1.94881889763779, 0.570866141732283, 0, 1.8503937007874, 
0.110236220472441, 1.44094488188976, 1.49212598425197, 0.0118110236220472, 
0, 0, 0.0708661417322835, 0, 0.551181102362205, 3.1496062992126, 
1.94881889763779, 1.62204724409449, 0.570866141732283, 0, 1.8503937007874, 
0.110236220472441, 1.44094488188976, 1.49212598425197, 0.0118110236220472, 
0, 0, 0.031496062992126, 1.68897637795276, 0.570866141732283, 
0, 1.8503937007874, 0.110236220472441, 1.44094488188976, 0, 0.598425196850394, 
0.811023622047244, 0.0984251968503937, 0, 0, 0.0590551181102362, 
1.08267716535433, 0.299212598425197, 1.49212598425197, 0.0118110236220472, 
0, 0.551181102362205, 1.5, 0.389763779527559, 0, 0.0984251968503937, 
0, 0, 0.0590551181102362, 1.08267716535433, 0.031496062992126, 
1.62204724409449, 1.8503937007874, 0.110236220472441, 1.44094488188976, 
0, 0, 1.68897637795276, 0, 1.8503937007874, 0.110236220472441, 
1.44094488188976)), row.names = c(NA, -87L), class = c("tbl_df", 
"tbl", "data.frame"))

# A tibble: 6 x 8
   year period district date       pinkcount airtemp_f watertemp_f rainfall_inch
  <dbl>  <dbl>    <dbl> <date>         <dbl>     <dbl>       <dbl>         <dbl>
1  2020      1      221 2020-07-11   2153732      56.7        58.2        0.0394
2  2020      2      221 2020-07-15   2074853      57.7        58.3        0     
3  2020      3      221 2020-07-16    526934      57.2        59.4        0.0315
4  2020      4      221 2020-07-19    721057      57.0        59.4        1.69  
5  2020      5      221 2020-07-23    728884      56.5        57.9        0.571 
6  2020      6      221 2020-07-24    116370      56.8        58.3        1.54 


 mod1 <- gam(pinkcount ~  s(airtemp_f) + s(watertemp_f) + s(rainfall_inch), data = test)
 aug <- augment(mod1)
 aug

After 'augment' I lost year,district,period, and date. 

# A tibble: 6 x 10
  pinkcount airtemp_f watertemp_f rainfall_inch .fitted .se.fit   .resid   .hat .sigma  .cooksd
      <dbl>     <dbl>       <dbl>         <dbl>   <dbl>   <dbl>    <dbl>  <dbl> <lgl>     <dbl>
1   2153732      56.7        58.2        0.0394 295858.  64667. 1857874. 0.0296 NA     0.137   
2   2074853      57.7        58.3        0      280398.  59418. 1794455. 0.0250 NA     0.107   
3    526934      57.2        59.4        0.0315 256397.  88107.  270537. 0.0550 NA     0.00567 
4    721057      57.0        59.4        1.69   182417.  90747.  538640. 0.0584 NA     0.0240  
5    728884      56.5        57.9        0.571  270947.  53998.  457937. 0.0207 NA     0.00569 
6    116370      56.8        58.3        1.54   225246.  55145. -108876. 0.0216 NA     0.000336

有谁知道如何保留这些原始专栏并将它们附加到 'aug'?

原始数据集的行数和aug相同,为什么不将它们绑定在一起。

library(dplyr)

test %>%
  select(year,district,period, date) %>%
  bind_cols(aug)

#    year district period date       pinkcount airtemp_f watertemp_f rainfall_inch .fitted
#   <dbl>    <dbl>  <dbl> <date>         <dbl>     <dbl>       <dbl>         <dbl>   <dbl>
# 1  2020      221      1 2020-07-11   2153732      56.7        58.2        0.0394 295858.
# 2  2020      221      2 2020-07-15   2074853      57.7        58.3        0      280398.
# 3  2020      221      3 2020-07-16    526934      57.2        59.4        0.0315 256397.
# 4  2020      221      4 2020-07-19    721057      57.0        59.4        1.69   182417.
# 5  2020      221      5 2020-07-23    728884      56.5        57.9        0.571  270947.
# 6  2020      221      6 2020-07-24    116370      56.8        58.3        1.54   225246.
# 7  2020      221      7 2020-07-25    231951      54.9        57.4        5.36    57257.
# 8  2020      221      8 2020-07-28    750551      57.6        58.5        0      280472.
# 9  2020      221      9 2020-08-01    803772      58.2        57.9        1.85   186288.
#10  2020      221     10 2020-08-02     76330      57.3        58.0        4.09    98621.
# … with 77 more rows, and 5 more variables: .se.fit <dbl>, .resid <dbl>, .hat <dbl>,
#   .sigma <lgl>, .cooksd <dbl>