使用 dplyr 等拟合许多 GAM 并提取导数

Fitting many GAMs and extracting derivatives using dplyr, etc

我需要将 GAM 与北美超过 1000 个鸟巢的 satellite-derived 物候数据 (EVI) 的时间序列相匹配。 GAM 看起来像这样:

EVI ~ s(天)

此模型需要适合多年的每个巢穴。拟合GAM后,我需要提取导数,可以用来获取每个巢每年spring开始的那一天。

理想情况下,我希望使用 tidyverse 和相关包来适应每个巢和年份的 GAM。然后我想获得每个模型的一阶导数。因为它是一个大数据集(>1000 个模型),所以对每个模型手动执行此操作是不可行的。

这是我的代码目前的样子:

图书馆:

library(mgcv)                         # fit the GAM
#devtools::install_github("gavinsimpson/tsgam") # to get the tsgam package
library(tsgam)                        # fderiv function
library(broom)
library(purrr)
library(dplyr)

数据:

EVI_nest_data <- structure(list(NestID = c(29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L), EVI = c(0.138169287379443, 0.130137123711153, 
    -0.0601195009642549, -0.0779132303669085, 0.119007158525673, 
    0.117033941141526, 0.136978681251301, 0.158265630276123, 0.184911987542837, 
    0.231065902413635, 0.235827030242373, 0.251683925119691, 0.265863073211945, 
    0.277412608027855, 0.345398904303846, 0.317428566021391, 0.320655922665656, 
    0.365148248230907, 0.399166432212128, 0.395530495249691, 0.408555574078434, 
    0.435927001361042, 0.457988839852063, 0.471232773544166, 0.58247300221377, 
    0.605087946423414, 0.544064641351546, 0.500725747018993, 0.515694326374929, 
    0.485923371886834, 0.38912851503709, 0.31623640671448, 0.28636661712885, 
    0.271462878408314, 0.23912601163518, 0.197224334805013, 0.167377596227415, 
    0.170303445041157, 0.162775515630323, 0.152289159775277, 0.146143190541624, 
    0.143272897184163, 0.0170566259267385, -0.0873424819202047, -0.144196012046888, 
    -0.11795840588453, -0.0437522532589144, -0.118758217275182, -0.0788013486245648, 
    -0.0434666293012519, -0.0889717254033134, -0.109810295597509, 
    -0.0979433930893343, -0.0695014701966185, -0.0727941935592107, 
    0.108063747925198, 0.141012203997097, 0.159793182262394, 0.185251199038151, 
    0.23902484552767, 0.262370361663236, 0.293599986569407, 0.246477967880569, 
    0.296704046172469, 0.364832264966032, 0.420785721438129, 0.515649782502755, 
    0.593698231711218, 0.650644713161621, 0.674706677006765, 0.729889909980883, 
    0.671085933201844, 0.662229259430051, 0.643746778462595, 0.563656149219038, 
    0.536507484614384, 0.428664251460763, 0.424638792032928, 0.362848516559366, 
    0.324369518951119, 0.274042768500715, 0.247502038808332, 0.226428461172987, 
    0.202030863129274, 0.194043008509045, 0.187428703967213, 0.186350861970308, 
    -0.0928074188360373, -0.103099561812049, -0.0950087816476854, 
    -0.127951240771348, -0.0731964425185014, -0.0735953543718254, 
    -0.0652932476052561, -0.0964820023201231, -0.128456548030901, 
    -0.0850037495489492, -0.0816757287942533, 0.124053320004109, 
    0.124802331049871, 0.128468679656191, 0.155367754190268, 0.172595885074457, 
    0.205172943223806, 0.23799608373125, 0.295842860300138, 0.266192953182183, 
    0.270199769824946, 0.325456419714002, 0.418809304967894, 0.506234342342094, 
    0.542166135383171, 0.58649224669783, 0.658174584259303, 0.661557382455075, 
    0.654039879899812, 0.693085389030335, 0.696299881716902, 0.651141604789385, 
    0.625163715612093, 0.59434641227456, 0.503194751705344, 0.443790310397243, 
    0.303605088370522, 0.296456522112772, 0.264598759432775, 0.256767949136579, 
    -0.00863667018730914, 0.148214595688042, 0.148114025527331, 0.10695720310348, 
    0.10432304150008, 0.0102107446971017, 0.0209462108119467, 0.0558393718493003, 
    0.0796985741519552, 0.128408104926543, 0.130348038590941, 0.136938002126515, 
    0.141133158619995, 0.153341970989871, 0.185664318459177, 0.23382350716503, 
    0.245722848447965, 0.280725297320107, 0.286714030274508, 0.276601788341198, 
    0.28869983778412, 0.429617203217443, 0.502815791196062, 0.543678558487744, 
    0.682038767726764, 0.729445506959948, 0.706995443722333, 0.670683864668805, 
    0.669971709049895, 0.660004448233754, 0.640115328437238, 0.626254974431609, 
    0.579810816037866, 0.515951459251395, 0.464542503193149, 0.339051097543664, 
    0.267999691958749, 0.245398744344898, 0.240586210852127, 0.215160040958741, 
    0.212985817402799, 0.205421003291693, 0.181380695803078, 0.175834408563713, 
    0.170302397059354, 0.164467173904389, -0.109744618573805, -0.0733910154523134, 
    -0.0112372603199326, -0.0116968253768986, -0.0389331550649953, 
    -0.17088254743052, -0.086528257002022, -0.0733364171794435, 0.130873576637048, 
    0.130287104658728, 0.156918320783852, 0.159137902001007, 0.177169191439639, 
    0.189077564769396, 0.186854990601556, 0.215065204884061, 0.224908816053833, 
    0.217477256913988, 0.207297170786867, 0.303227920672108, 0.381783895956506, 
    0.464210614456201, 0.576500769382467, 0.670887856300319, 0.755035573903038, 
    0.764883426467366, 0.743177264884881, 0.737057237403139, 0.700476098338919, 
    0.700830749184085, 0.662841978283046, 0.595808383583464, 0.493852987369538, 
    0.410896633943903, 0.303427604351243, 0.264082714034867, 0.225571394900212, 
    0.196407407134823, 0.194864945007588, 0.189049017546347, 0.192064052939134, 
    0.205793750782993, 0.127459080662136, 0.0228394733374728, 0.000814098239512075, 
    -0.00703034480699902, 0.026508315098859), Day = c(1L, 9L, 17L, 
    25L, 41L, 49L, 57L, 65L, 73L, 81L, 89L, 97L, 105L, 113L, 121L, 
    129L, 137L, 145L, 153L, 161L, 169L, 177L, 185L, 193L, 201L, 209L, 
    217L, 225L, 233L, 241L, 249L, 257L, 265L, 273L, 281L, 289L, 297L, 
    305L, 313L, 321L, 329L, 337L, 345L, 353L, 361L, 1L, 9L, 25L, 
    33L, 41L, 49L, 57L, 65L, 73L, 81L, 89L, 97L, 105L, 113L, 121L, 
    129L, 137L, 145L, 153L, 161L, 169L, 177L, 185L, 193L, 201L, 209L, 
    217L, 225L, 233L, 241L, 249L, 257L, 265L, 273L, 281L, 289L, 297L, 
    305L, 313L, 321L, 329L, 337L, 353L, 361L, 1L, 9L, 17L, 25L, 33L, 
    41L, 49L, 57L, 65L, 73L, 81L, 89L, 97L, 105L, 113L, 121L, 129L, 
    137L, 145L, 153L, 161L, 169L, 177L, 185L, 193L, 201L, 209L, 217L, 
    225L, 233L, 241L, 249L, 257L, 265L, 273L, 281L, 289L, 297L, 321L, 
    353L, 361L, 1L, 9L, 25L, 33L, 41L, 49L, 57L, 65L, 73L, 81L, 89L, 
    97L, 105L, 113L, 121L, 129L, 137L, 145L, 153L, 161L, 169L, 177L, 
    185L, 193L, 201L, 209L, 217L, 225L, 233L, 241L, 249L, 257L, 265L, 
    273L, 281L, 289L, 297L, 305L, 313L, 321L, 329L, 337L, 345L, 353L, 
    361L, 1L, 9L, 17L, 25L, 33L, 41L, 57L, 65L, 73L, 81L, 89L, 97L, 
    105L, 113L, 121L, 129L, 137L, 145L, 153L, 161L, 169L, 177L, 185L, 
    193L, 201L, 209L, 217L, 225L, 233L, 241L, 249L, 257L, 265L, 273L, 
    281L, 289L, 297L, 305L, 313L, 321L, 329L, 337L, 345L, 353L, 361L
    ), Year = c(2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 
    2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 
    2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 
    2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 
    2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 
    2012L, 2012L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
    2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
    2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
    2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
    2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
    2013L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
    2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
    2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
    2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
    2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 
    2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 
    2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 
    2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 
    2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 
    2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2016L, 2016L, 2016L, 
    2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
    2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
    2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
    2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
    2016L, 2016L, 2016L, 2016L, 2016L, 2016L)), class = c("tbl_df", 
    "tbl", "data.frame"), row.names = c(NA, -220L), .Names = c("NestID", 
    "EVI", "Day", "Year"))

这里我按每个 NestID 和 Year 组合分组,并使用 'do' 使 GAM 适合每个。结果是一个包含每个模型的新列 'mod' 的标题:

by_nest_year <- group_by(EVI_nest_data, NestID, Year)

models <- by_nest_year %>% 
  do(mod = gam(EVI ~ Day, data = .)) 
models

这是我不确定如何处理的部分。我创建了一个用于预测目的的新数据框 (newd),然后该数据框和模型可用于获取导数。我想知道如何使用我在上面创建的模型数据集来完成,也许是通过向其中添加几行代码?我是否会为每个 NestID/Year 创建另一个包含 newd 的列,然后在另一列中计算导数?

# Code adapted from the site below:
#https://www.fromthebottomoftheheap.net/2017/03/21/simultaneous-intervals-for-derivatives-of-smooths/
UNCONDITIONAL <- FALSE # unconditional or conditional on estimating smooth params?
EPS <- 1e-07           # finite difference

# create a new df for prediction and fit the gam for 2012 data:
newd <- with(EVI_nest_data %>% filter(Year==2012), data.frame(Day = seq(1:365)))
m <- gam(EVI ~ s(Day), data = EVI_nest_data)

# extracting derivatives:
fd <- fderiv(m, newdata = newd, eps = EPS, unconditional = UNCONDITIONAL)
fd$deriv$Day$deriv
plot(fd$deriv$Day$deriv)

如果我没理解错的话,你可以使用这个解决方案:

library(mgcv)                  
library(tsgam) 
library(tidyverse)  


my_prediction <- function(data,eps = EPS,unconditional = UNCONDITIONAL){
  mod = gam(EVI ~ s(Day), data = data)
  days <- tibble(Day = c(1:365))
  fd <- fderiv(mod,newdata = days, eps = eps, unconditional = unconditional)
  tibble(Day = days$Day,pred = as.numeric(fd$deriv$Day$deriv))
}
UNCONDITIONAL <- FALSE # unconditional or conditional on estimating smooth params?
EPS <- 1e-07           # finite difference
df_model <- EVI_nest_data %>% 
  nest(-c(NestID,Year)) %>% 
  mutate(pred = map(data,my_prediction))
df_model %>% 
  unnest(pred) %>% 
  ggplot(aes(Day,pred,col = as.factor(Year))) + geom_line() +
  theme_bw() +
  facet_wrap(c('NestID'))