R: minpack.lm::nls.lm 失败但结果不错

Question

我使用 minpack.lm 包中的 nls.lm 来拟合许多非线性模型。

由于初始参数估计处的奇异梯度矩阵，它经常在 20 次迭代后失败。

问题是当我查看失败前的迭代时 (trace = T) 我可以看到结果是好的。

可复制示例：

数据：

df <- structure(list(x1 = c(7L, 5L, 10L, 6L, 9L, 10L, 2L, 4L, 9L, 3L, 
11L, 6L, 4L, 0L, 7L, 12L, 9L, 11L, 11L, 0L, 2L, 3L, 5L, 6L, 6L, 
9L, 1L, 7L, 7L, 4L, 3L, 13L, 12L, 13L, 5L, 0L, 5L, 6L, 6L, 7L, 
5L, 10L, 6L, 10L, 0L, 7L, 9L, 12L, 4L, 5L, 6L, 3L, 4L, 5L, 5L, 
0L, 9L, 9L, 1L, 2L, 2L, 13L, 8L, 2L, 5L, 10L, 6L, 11L, 5L, 0L, 
4L, 4L, 8L, 9L, 4L, 2L, 12L, 4L, 10L, 7L, 0L, 4L, 4L, 5L, 8L, 
8L, 12L, 4L, 6L, 13L, 5L, 12L, 1L, 6L, 4L, 9L, 11L, 11L, 6L, 
10L, 10L, 0L, 3L, 1L, 11L, 4L, 3L, 13L, 5L, 4L, 2L, 3L, 11L, 
7L, 0L, 9L, 6L, 11L, 6L, 13L, 1L, 5L, 0L, 6L, 4L, 8L, 2L, 3L, 
7L, 9L, 12L, 11L, 7L, 4L, 10L, 0L, 6L, 1L, 7L, 2L, 6L, 3L, 1L, 
6L, 10L, 12L, 7L, 7L, 6L, 6L, 1L, 7L, 8L, 7L, 7L, 5L, 7L, 10L, 
10L, 11L, 7L, 1L, 8L, 3L, 12L, 0L, 11L, 8L, 5L, 0L, 6L, 3L, 2L, 
2L, 8L, 9L, 2L, 8L, 2L, 13L, 10L, 2L, 12L, 6L, 13L, 2L, 11L, 
1L, 12L, 6L, 7L, 9L, 8L, 10L, 2L, 6L, 0L, 2L, 11L, 2L, 3L, 9L, 
12L, 1L, 11L, 11L, 12L, 4L, 6L, 9L, 1L, 4L, 1L, 8L, 8L, 6L, 1L, 
9L, 8L, 2L, 10L, 10L, 1L, 2L, 0L, 11L, 6L, 6L, 0L, 4L, 13L, 4L, 
8L, 4L, 10L, 9L, 6L, 11L, 8L, 1L, 6L, 5L, 10L, 8L, 10L, 8L, 0L, 
3L, 0L, 6L, 7L, 4L, 3L, 7L, 7L, 8L, 6L, 2L, 9L, 5L, 7L, 7L, 0L, 
7L, 2L, 5L, 5L, 7L, 5L, 7L, 8L, 6L, 1L, 2L, 6L, 0L, 8L, 10L, 
0L, 10L), x2 = c(4L, 6L, 1L, 5L, 4L, 1L, 8L, 9L, 4L, 7L, 2L, 
6L, 9L, 11L, 5L, 1L, 3L, 2L, 2L, 12L, 8L, 9L, 6L, 4L, 4L, 2L, 
9L, 6L, 6L, 6L, 8L, 0L, 0L, 0L, 8L, 10L, 7L, 7L, 4L, 5L, 5L, 
3L, 6L, 3L, 12L, 6L, 1L, 0L, 8L, 6L, 6L, 7L, 8L, 5L, 8L, 11L, 
3L, 2L, 12L, 11L, 10L, 0L, 2L, 8L, 8L, 3L, 7L, 2L, 7L, 10L, 7L, 
8L, 2L, 4L, 7L, 11L, 1L, 8L, 2L, 5L, 11L, 9L, 7L, 5L, 5L, 3L, 
1L, 8L, 4L, 0L, 5L, 0L, 12L, 5L, 9L, 1L, 2L, 0L, 5L, 0L, 2L, 
10L, 9L, 10L, 0L, 8L, 10L, 0L, 6L, 8L, 8L, 7L, 1L, 6L, 10L, 1L, 
5L, 1L, 6L, 0L, 12L, 7L, 13L, 6L, 9L, 2L, 11L, 10L, 5L, 2L, 0L, 
2L, 5L, 6L, 2L, 10L, 4L, 10L, 4L, 9L, 5L, 9L, 11L, 4L, 3L, 1L, 
6L, 3L, 7L, 7L, 10L, 3L, 3L, 6L, 3L, 7L, 4L, 1L, 0L, 1L, 4L, 
11L, 4L, 10L, 0L, 11L, 0L, 3L, 5L, 11L, 5L, 8L, 10L, 9L, 4L, 
3L, 10L, 4L, 10L, 0L, 3L, 9L, 1L, 7L, 0L, 8L, 1L, 11L, 0L, 5L, 
4L, 2L, 2L, 0L, 11L, 6L, 13L, 9L, 1L, 9L, 7L, 3L, 1L, 12L, 2L, 
2L, 1L, 6L, 4L, 2L, 10L, 6L, 10L, 2L, 3L, 4L, 9L, 2L, 5L, 10L, 
0L, 0L, 10L, 9L, 12L, 0L, 7L, 5L, 10L, 6L, 0L, 9L, 4L, 8L, 1L, 
3L, 5L, 2L, 4L, 12L, 4L, 5L, 2L, 5L, 0L, 2L, 10L, 8L, 10L, 7L, 
3L, 8L, 8L, 6L, 3L, 5L, 6L, 11L, 4L, 5L, 4L, 3L, 10L, 6L, 8L, 
6L, 7L, 4L, 8L, 5L, 3L, 7L, 12L, 8L, 4L, 11L, 2L, 3L, 12L, 1L
), x3 = c(1, 1, 1, 1, 3, 1, 0, 3, 3, 0, 3, 2, 3, 1, 2, 3, 2, 
3, 3, 2, 0, 2, 1, 0, 0, 1, 0, 3, 3, 0, 1, 3, 2, 3, 3, 0, 2, 3, 
0, 2, 0, 3, 2, 3, 2, 3, 0, 2, 2, 1, 2, 0, 2, 0, 3, 1, 2, 1, 3, 
3, 2, 3, 0, 0, 3, 3, 3, 3, 2, 0, 1, 2, 0, 3, 1, 3, 3, 2, 2, 2, 
1, 3, 1, 0, 3, 1, 3, 2, 0, 3, 0, 2, 3, 1, 3, 0, 3, 1, 1, 0, 2, 
0, 2, 1, 1, 2, 3, 3, 1, 2, 0, 0, 2, 3, 0, 0, 1, 2, 2, 3, 3, 2, 
3, 2, 3, 0, 3, 3, 2, 1, 2, 3, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 2, 
0, 3, 3, 3, 0, 3, 3, 1, 0, 1, 3, 0, 2, 1, 1, 0, 2, 1, 2, 2, 3, 
2, 1, 1, 1, 0, 1, 1, 1, 2, 1, 2, 2, 2, 2, 2, 3, 3, 1, 3, 3, 3, 
0, 2, 2, 2, 1, 1, 1, 0, 0, 3, 2, 3, 1, 2, 1, 0, 2, 3, 3, 3, 3, 
3, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 3, 2, 0, 0, 1, 1, 2, 1, 3, 
1, 0, 0, 3, 3, 2, 2, 1, 2, 1, 3, 2, 3, 0, 0, 2, 3, 0, 0, 0, 1, 
0, 3, 0, 2, 1, 3, 0, 3, 2, 3, 3, 0, 1, 0, 0, 3, 0, 1, 2, 1, 3, 
2, 1, 3, 3, 0, 0, 1, 0, 3, 2, 1), y = c(0.03688, 0.09105, 0.16246, 
0, 0.11024, 0.16246, 0.13467, 0, 0.11024, 0.0807, 0.12726, 0.03934, 
0, 0.0826, 0.03688, 0.06931, 0.1378, 0.12726, 0.12726, 0.08815, 
0.13467, 0.01314, 0.09105, 0.12077, 0.12077, 0.02821, 0.15134, 
0.03604, 0.03604, 0.08729, 0.04035, 0.46088, 0.20987, 0.46088, 
0.06672, 0.24121, 0.08948, 0.07867, 0.12077, 0.03688, 0.02276, 
0.04535, 0.03934, 0.04535, 0.08815, 0.03604, 0.50771, 0.20987, 
0.08569, 0.09105, 0.03934, 0.0807, 0.08569, 0.02276, 0.06672, 
0.0826, 0.1378, 0.02821, 0.03943, 0.03589, 0.04813, 0.46088, 
0.22346, 0.13467, 0.06672, 0.04535, 0.07867, 0.12726, 0.08948, 
0.24121, 0.06983, 0.08569, 0.22346, 0.11024, 0.06983, 0.03589, 
0.06931, 0.08569, 0.04589, 0.03688, 0.0826, 0, 0.06983, 0.02276, 
0.06238, 0.03192, 0.06931, 0.08569, 0.12077, 0.46088, 0.02276, 
0.20987, 0.03943, 0, 0, 0.50771, 0.12726, 0.1628, 0, 0.41776, 
0.04589, 0.24121, 0.01314, 0.03027, 0.1628, 0.08569, 0, 0.46088, 
0.09105, 0.08569, 0.13467, 0.0807, 0.12912, 0.03604, 0.24121, 
0.50771, 0, 0.12912, 0.03934, 0.46088, 0.03943, 0.08948, 0.07103, 
0.03934, 0, 0.22346, 0.03589, 0, 0.03688, 0.02821, 0.20987, 0.12726, 
0.03688, 0.08729, 0.04589, 0.24121, 0.12077, 0.03027, 0.03688, 
0.03673, 0, 0.01314, 0.02957, 0.12077, 0.04535, 0.06931, 0.03604, 
0.36883, 0.07867, 0.07867, 0.03027, 0.36883, 0.03192, 0.03604, 
0.36883, 0.08948, 0.03688, 0.16246, 0.41776, 0.12912, 0.03688, 
0.02957, 0.1255, 0, 0.20987, 0.0826, 0.1628, 0.03192, 0.02276, 
0.0826, 0, 0.04035, 0.04813, 0.03673, 0.1255, 0.1378, 0.04813, 
0.1255, 0.04813, 0.46088, 0.04535, 0.03673, 0.06931, 0.07867, 
0.46088, 0.13467, 0.12912, 0.02957, 0.20987, 0, 0.03688, 0.02821, 
0.22346, 0.41776, 0.03589, 0.03934, 0.07103, 0.03673, 0.12912, 
0.03673, 0.0807, 0.1378, 0.06931, 0.03943, 0.12726, 0.12726, 
0.06931, 0.08729, 0.12077, 0.02821, 0.03027, 0.08729, 0.03027, 
0.22346, 0.03192, 0.12077, 0.15134, 0.02821, 0.06238, 0.04813, 
0.41776, 0.41776, 0.03027, 0.03673, 0.08815, 0.1628, 0.07867, 
0, 0.24121, 0.08729, 0.46088, 0, 0.1255, 0.08569, 0.16246, 0.1378, 
0, 0.12726, 0.1255, 0.03943, 0.12077, 0.02276, 0.04589, 0.06238, 
0.41776, 0.22346, 0.24121, 0.04035, 0.24121, 0.07867, 0.36883, 
0.08569, 0.04035, 0.03604, 0.36883, 0.06238, 0.03934, 0.03589, 
0.11024, 0.02276, 0.03688, 0.36883, 0.24121, 0.03604, 0.13467, 
0.09105, 0.08948, 0.03688, 0.06672, 0.03688, 0.03192, 0.07867, 
0.03943, 0.13467, 0.12077, 0.0826, 0.22346, 0.04535, 0.08815, 
0.16246)), .Names = c("x1", "x2", "x3", "y"), row.names = c(995L, 
1416L, 281L, 1192L, 1075L, 294L, 1812L, 2235L, 1097L, 1583L, 
670L, 1485L, 2199L, 2495L, 1259L, 436L, 803L, 631L, 617L, 2654L, 
1813L, 2180L, 1403L, 911L, 927L, 533L, 2024L, 1517L, 1522L, 1356L, 
1850L, 222L, 115L, 204L, 1974L, 2292L, 1695L, 1746L, 915L, 1283L, 
1128L, 880L, 1467L, 887L, 2665L, 1532L, 267L, 155L, 1933L, 1447L, 
1488L, 1609L, 1922L, 1168L, 1965L, 2479L, 813L, 550L, 2707L, 
2590L, 2373L, 190L, 504L, 1810L, 2007L, 843L, 1770L, 659L, 1730L, 
2246L, 1668L, 1923L, 465L, 1108L, 1663L, 2616L, 409L, 1946L, 
589L, 1277L, 2493L, 2210L, 1662L, 1142L, 1331L, 735L, 430L, 1916L, 
922L, 208L, 1134L, 127L, 2693L, 1213L, 2236L, 240L, 623L, 108L, 
1190L, 9L, 575L, 2268L, 2171L, 2308L, 103L, 1953L, 2409L, 184L, 
1437L, 1947L, 1847L, 1570L, 365L, 1550L, 2278L, 270L, 1204L, 
384L, 1472L, 205L, 2694L, 1727L, 2800L, 1476L, 2229L, 453L, 2630L, 
2426L, 1275L, 523L, 163L, 635L, 1287L, 1349L, 561L, 2261L, 931L, 
2339L, 973L, 2113L, 1229L, 2155L, 2554L, 936L, 892L, 433L, 1560L, 
697L, 1791L, 1755L, 2351L, 720L, 740L, 1558L, 674L, 1736L, 988L, 
321L, 18L, 375L, 959L, 2560L, 1047L, 2429L, 119L, 2468L, 98L, 
773L, 1158L, 2520L, 1216L, 1872L, 2364L, 2094L, 1035L, 826L, 
2374L, 1028L, 2368L, 176L, 895L, 2090L, 399L, 1789L, 179L, 1800L, 
369L, 2568L, 140L, 1207L, 1001L, 518L, 481L, 12L, 2597L, 1474L, 
2749L, 2097L, 379L, 2110L, 1615L, 800L, 423L, 2733L, 626L, 662L, 
421L, 1363L, 898L, 530L, 2315L, 1365L, 2331L, 468L, 768L, 900L, 
2027L, 544L, 1337L, 2376L, 53L, 44L, 2338L, 2075L, 2655L, 78L, 
1782L, 1231L, 2291L, 1379L, 212L, 2212L, 1032L, 1929L, 331L, 
790L, 1226L, 664L, 1018L, 2735L, 916L, 1157L, 590L, 1343L, 7L, 
490L, 2257L, 1853L, 2251L, 1748L, 719L, 1941L, 1885L, 1544L, 
725L, 1294L, 1494L, 2601L, 1077L, 1169L, 979L, 709L, 2282L, 1526L, 
1797L, 1424L, 1690L, 993L, 1979L, 1268L, 730L, 1739L, 2697L, 
1842L, 952L, 2483L, 479L, 864L, 2677L, 283L), class = "data.frame")

起始值

starting_value <- structure(c(0.177698291502873, 0.6, 0.0761564106440883, 0.05, 
1.9, 1.1, 0.877181493020499, 1.9), .Names = c("F_initial_x2", 
"F_decay_x2", "S_initial_x2", "S_decay_x2", "initial_x1", "decay_x1", 
"initial_x3", "decay_x3"))

NLSLM 失败

coef(nlsLM( 
  formula   = y ~ (F_initial_x2   * exp(- F_decay_x2  * x2) + S_initial_x2 * exp(- S_decay_x2 * x2)) *
    (1 + initial_x1      * exp(- decay_x1      * x1)) *
    (1 + initial_x3      * exp(- decay_x3      * x3 )),
  data     = df,
  start    = coef(brute_force),
  lower    = c(0, 0, 0, 0, 0, 0, 0, 0),
  control  = nls.lm.control(maxiter = 200),
  trace    = T))

It.    0, RSS =    1.36145, Par. =   0.177698        0.6  0.0761564       0.05        1.9        1.1   0.877181        1.9
It.    1, RSS =    1.25401, Par. =   0.207931   0.581039  0.0769047  0.0577244    2.01947    1.22911   0.772957    5.67978
It.    2, RSS =    1.19703, Par. =   0.188978   0.604515  0.0722749  0.0792141    2.44179     1.1258    0.96305    8.67253
It.    3, RSS =     1.1969, Par. =   0.160885   0.640958  0.0990201   0.145187     3.5853   0.847158   0.961844    13.2183
It.    4, RSS =    1.19057, Par. =   0.142138   0.685678    0.11792   0.167417    4.27977   0.936981   0.959606    13.2644
It.    5, RSS =    1.19008, Par. =   0.124264   0.757088   0.136277   0.188896    4.76578    0.91274   0.955142    21.0167
It.    6, RSS =    1.18989, Par. =   0.118904   0.798296   0.141951   0.194167    4.93099    0.91529   0.952972     38.563
It.    7, RSS =    1.18987, Par. =   0.115771   0.821874   0.145398   0.197773    5.02251   0.914204   0.949906     38.563
It.    8, RSS =    1.18986, Par. =   0.113793   0.837804   0.147573   0.199943    5.07456   0.914192   0.948289     38.563
It.    9, RSS =    1.18986, Par. =   0.112458   0.848666   0.149033   0.201406    5.11024   0.914099   0.947232     38.563
It.   10, RSS =    1.18986, Par. =   0.111538   0.856282   0.150035   0.202411    5.13491   0.914051   0.946546     38.563
It.   11, RSS =    1.18986, Par. =   0.110889   0.861702    0.15074   0.203118    5.15244   0.914013   0.946076     38.563
It.   12, RSS =    1.18986, Par. =   0.110426   0.865606   0.151243   0.203623    5.16501   0.913986   0.945747     38.563
It.   13, RSS =    1.18986, Par. =   0.110092   0.868441   0.151605   0.203986    5.17412   0.913966   0.945512     38.563
It.   14, RSS =    1.18986, Par. =   0.109849    0.87051   0.151868    0.20425    5.18075   0.913952   0.945343     38.563
It.   15, RSS =    1.18985, Par. =   0.109672   0.872029    0.15206   0.204443    5.18561   0.913941    0.94522     38.563
It.   16, RSS =    1.18985, Par. =   0.109542   0.873147   0.152201   0.204585    5.18918   0.913933   0.945131     38.563
It.   17, RSS =    1.18985, Par. =   0.109446   0.873971   0.152305   0.204689    5.19181   0.913927   0.945065     38.563
Error in nlsModel(formula, mf, start, wts) : 
  singular gradient matrix at initial parameter estimates

问题：

使用在奇异梯度矩阵问题之前找到的最佳参数是否有意义，即在 Iteration = 17 时找到的参数？
如果是，有没有办法获取它们？发生错误时我没有成功保存结果
我注意到，如果我将 maxiter 的数量减少到 17 以下，我仍然会遇到在新的最后一次迭代中出现的相同错误，这对我来说没有意义

例如 maxiter = 10

It.    0, RSS =    1.36145, Par. =   0.177698        0.6  0.0761564       0.05        1.9        1.1   0.877181        1.9
It.    1, RSS =    1.25401, Par. =   0.207931   0.581039  0.0769047  0.0577244    2.01947    1.22911   0.772957    5.67978
It.    2, RSS =    1.19703, Par. =   0.188978   0.604515  0.0722749  0.0792141    2.44179     1.1258    0.96305    8.67253
It.    3, RSS =     1.1969, Par. =   0.160885   0.640958  0.0990201   0.145187     3.5853   0.847158   0.961844    13.2183
It.    4, RSS =    1.19057, Par. =   0.142138   0.685678    0.11792   0.167417    4.27977   0.936981   0.959606    13.2644
It.    5, RSS =    1.19008, Par. =   0.124264   0.757088   0.136277   0.188896    4.76578    0.91274   0.955142    21.0167
It.    6, RSS =    1.18989, Par. =   0.118904   0.798296   0.141951   0.194167    4.93099    0.91529   0.952972     38.563
It.    7, RSS =    1.18987, Par. =   0.115771   0.821874   0.145398   0.197773    5.02251   0.914204   0.949906     38.563
It.    8, RSS =    1.18986, Par. =   0.113793   0.837804   0.147573   0.199943    5.07456   0.914192   0.948289     38.563
It.    9, RSS =    1.18986, Par. =   0.112458   0.848666   0.149033   0.201406    5.11024   0.914099   0.947232     38.563
It.   10, RSS =    0.12289, Par. =   0.112458   0.848666   0.149033   0.201406    5.11024   0.914099   0.947232     38.563
Error in nlsModel(formula, mf, start, wts) : 
  singular gradient matrix at initial parameter estimates
In addition: Warning message:
In nls.lm(par = start, fn = FCT, jac = jac, control = control, lower = lower,  :
  lmdif: info = -1. Number of iterations has reached `maxiter' == 10.

你看到什么解释了吗？

Answer 1

经常发生这个错误，问题不是代码而是使用的模型。 singular gradient matrix at the initial parameter estimates 可能表示模型没有单一的唯一解，或者模型对于手头的数据指定过度。

回答您的问题：

是的，这是有道理的。函数 nlsLM 首先调用 nls.lm 进行迭代。当它到达迭代的末尾时（由于最佳拟合或因为 max.iter），结果将传递给函数 nlsModel。该函数对乘以平方权重的梯度矩阵进行 QR 分解。并且您的初始梯度矩阵包含一个只有零的列。因此，虽然 nls.lm 可以进行迭代，但只有在下一步 nlsModel 才真正检查并发现梯度矩阵的问题。
有一种方法，但这需要您更改 R 本身的选项，特别是 error 选项。通过将其设置为 dump.frames，您可以获得出错时存在的所有环境的转储。这些存储在名为 last.dump 的列表中，您可以使用这些环境来查找所需的值。

在这种情况下，参数由位于主力函数 nlsModel:

环境中的函数 getPars() 返回

old.opt <- options(error = dump.frames)

themod <- nlsLM( 
  formula   = y ~ (F_initial_x2   * exp(- F_decay_x2  * x2) + 
                     S_initial_x2 * exp(- S_decay_x2 * x2)) *
    (1 + initial_x1      * exp(- decay_x1      * x1)) *
    (1 + initial_x3      * exp(- decay_x3      * x3 )),
  data     = df,
  start    = starting_value,
  lower    = c(0, 0, 0, 0, 0, 0, 0, 0),
  control  = nls.lm.control(maxiter = 200),
  trace    = TRUE)

thecoefs <- llast.dump[["nlsModel(formula, mf, start, wts)"]]$getPars()
options(old.opt) # reset to the previous value.

请注意，这不是您想要在生产环境中使用或与同事共享的那种代码。而且它也不是你问题的解决方案，因为问题是模型，而不是代码。

这是我在 1 中解释的另一个结果。所以是的，这是合乎逻辑的。

我做了一个非常简短的测试，看看它是否真的是模型，如果我用它的起始值替换最后一个参数 (decay_x3)，模型就可以毫无问题地拟合。我不知道我们在这里处理什么，所以在现实世界中删除另一个参数可能更有意义，但只是为了证明您的代码没问题：

themod <- nlsLM( 
  formula   = y ~ (F_initial_x2   * exp(- F_decay_x2  * x2) + 
                     S_initial_x2 * exp(- S_decay_x2 * x2)) *
    (1 + initial_x1      * exp(- decay_x1      * x1)) *
    (1 + initial_x3      * exp(- 1.9* x3 )),
  data     = df,
  start    = starting_value[-8],
  lower    = c(0, 0, 0, 0, 0, 0, 0, 0)[-8],
  control  = nls.lm.control(maxiter = 200),
  trace    = TRUE)

在迭代 10 时无错误退出。

编辑：我一直在深入研究它，根据数据，"extra" 解决方案基本上是将 x3 踢出模型。那里只有 3 个唯一值，参数的初始估计值约为 38。所以：

> exp(-38*c(1,2,3)) < .Machine$double.eps
[1] TRUE TRUE TRUE

如果将其与实际 Y 值进行比较，很明显 initial_x3 * exp(- decay_x3 * x3 ) 对模型没有任何贡献，因为它实际上为 0。

如果您像 nlsModel 中那样手动计算梯度，您会得到一个不是满秩的矩阵；最后一列仅包含 0 :

theenv <- list2env( c(df, thecoefs))
thederiv <- numericDeriv(form[[3]], names(starting_value), theenv)
thegrad <- attr(thederiv, "gradient")

这就是导致错误的原因。该模型对于您拥有的数据指定过多。

Gabor 建议的对数变换可防止您最后的估计值过大而迫使 x3 超出模型。由于对数变换，该算法不会轻易跳到这样的极值。为了获得与原始模型相同的估计值，他的 decay_x3 应该与 3.2e16 一样高以指定相同的模型 (exp(38))。因此，对数转换可以保护您免受将任何变量的影响强制为 0 的估计。

对数转换的另一个副作用是 decay_x3 值的大步长对模型的影响不大。 Gabor 发现的估计值已经是一个惊人的 1.3e7，但在反向转换之后对于 decay_x3 仍然是 16 的可行值。如果您看一下，这仍然会使模型中的 x3 变得多余：

> exp(-16*c(1,2,3))
[1] 1.125352e-07 1.266417e-14 1.425164e-21

但它不会导致导致您的错误的奇点。

您可以通过设置上限来避免这种情况，例如：

themod <- nlsLM( 
  formula   = y ~ (F_initial_x2   * exp(- F_decay_x2  * x2) + 
                     S_initial_x2 * exp(- S_decay_x2 * x2)) *
    (1 + initial_x1      * exp(- decay_x1      * x1)) *
    (1 + initial_x3      * exp(- decay_x3      * x3 )),
  data     = df,
  start    = starting_value,
  lower    = c(0, 0, 0, 0, 0, 0, 0, 0),
  upper    = rep(- log(.Machine$double.eps^0.5),8),
  control  = nls.lm.control(maxiter = 200),
  trace    = TRUE)

运行得很好，给你相同的估计，并再次得出结论 x3 是多余的。

所以不管你怎么看，x3 对 y 没有影响，你的模型被过度指定并且不能很好地适应手头的数据。

Answer 2

问题的根本问题是没有实现收敛。这可以通过使用 Y = log(X+1) 转换衰减参数然后使用 X = exp(Y)-1 将它们转换回来来解决。这种转换可以有益地修改雅可比矩阵。不幸的是，这种转换的应用往往主要是反复试验。（另见注释 1。）

ix <- grep("decay", names(starting_value))
fm <- nlsLM( 
   formula   = y ~ (F_initial_x2   * exp(- log(F_decay_x2+1)  * x2) + 
                    S_initial_x2 * exp(- log(S_decay_x2+1) * x2)) *
                    (1 + initial_x1 * exp(- log(decay_x1+1) * x1)) *
                    (1 + initial_x3 * exp(- log(decay_x3+1) * x3 )),
   data     = df,
   start    = replace(starting_value, ix, exp(starting_value[ix]) - 1),
   lower    = c(0, 0, 0, 0, 0, 0, 0, 0),
   control  = nls.lm.control(maxiter = 200),
   trace    = TRUE)

给出类似的残差平方和但实现收敛：

> fm
Nonlinear regression model
  model: y ~ (F_initial_x2 * exp(-log(F_decay_x2 + 1) * x2) + S_initial_x2 *     exp(-log(S_decay_x2 + 1) * x2)) * (1 + initial_x1 * exp(-log(decay_x1 +     1) * x1)) * (1 + initial_x3 * exp(-log(decay_x3 + 1) * x3))
   data: df
F_initial_x2   F_decay_x2 S_initial_x2   S_decay_x2   initial_x1     decay_x1 
   1.092e-01    1.402e+00    1.526e-01    2.275e-01    5.199e+00    1.494e+00 
  initial_x3     decay_x3 
   9.449e-01    1.375e+07 
 residual sum-of-squares: 1.19

Number of iterations to convergence: 38 
Achieved convergence tolerance: 1.49e-08

> replace(coef(fm), ix, log(coef(fm)[ix]+1))
F_initial_x2   F_decay_x2 S_initial_x2   S_decay_x2   initial_x1     decay_x1 
   0.1091735    0.8763253    0.1525997    0.2049852    5.1993194    0.9139096 
  initial_x3     decay_x3 
   0.9448779   16.4368001

注 1： 经过一些实验后，我发现只需将转换应用于 decay_x3。

注 2： 关于您想要自动执行的注释，请注意，符合 lm 的三次多项式更一致地不会运行麻烦并且残差平方和较低——1.14 对 1.19——但以更多参数为代价——10 对 8。

# lm poly fit
fm.poly <- lm(y ~ poly(x1, x2, degree = 3), df)
deviance(fm.poly) # residual sum of squares
## [1] 1.141398
length(coef(fm.poly)) # no. of coefficients
## [1] 10

# nlsLM fit transforming decay parameters
deviance(fm)
## [1] 1.189855
length(coef(fm))
## [1] 8

注3：这是另一个模型，用二次多项式代替x3部分，去掉F_initial_x2，因为它变得多余。它也有 8 个参数，它收敛并且比问题中的模型更适合数据（即具有较低的残差平方和）。

fm3 <- nlsLM(formula   = y ~ (exp(- F_decay_x2  * x2) + 
              S_initial_x2 * exp(- S_decay_x2 * x2)) *
              (1 + initial_x1 * exp(- decay_x1      * x1)) *
              cbind(1, poly(x3, degree = 2)) %*% c(p1,p2,p3),
          data     = df,
          start    = c(starting_value[-c(1, 7:8)], p1=0, p2=0, p3=0),
          lower = c(0, 0, 0, 0, 0, 0, NA, NA),
          control  = nls.lm.control(maxiter = 200),
          trace    = TRUE)

给予：

> fm3
Nonlinear regression model
  model: y ~ (exp(-F_decay_x2 * x2) + S_initial_x2 * exp(-S_decay_x2 *     x2)) * (1 + initial_x1 * exp(-decay_x1 * x1)) * cbind(1,     poly(x3, degree = 2)) %*% c(p1, p2, p3)
   data: df
  F_decay_x2 S_initial_x2   S_decay_x2   initial_x1     decay_x1           p1 
     3.51614      2.60886      0.26304      8.26244      0.81232      0.09031 
          p2           p3 
    -0.16968      0.53324 
 residual sum-of-squares: 1.019

Number of iterations to convergence: 20 
Achieved convergence tolerance: 1.49e-08

注 4： nlmrt 包中的 nlxb 无需做任何特殊操作即可收敛。

library(nlmrt)
nlxb( 
  formula   = y ~ (F_initial_x2   * exp(- F_decay_x2  * x2) + S_initial_x2 * exp(- S_decay_x2 * x2)) *
    (1 + initial_x1      * exp(- decay_x1      * x1)) *
    (1 + initial_x3      * exp(- decay_x3      * x3 )),
  data     = df,
  start    = starting_value,
  lower    = c(0, 0, 0, 0, 0, 0, 0, 0),
  control  = nls.lm.control(maxiter = 200),
  trace    = TRUE)

给予：

residual sumsquares =  1.1899  on  280 observations
    after  31    Jacobian and  33 function evaluations
  name            coeff          SE       tstat      pval      gradient    JSingval   
F_initial_x2      0.109175            NA         NA         NA   3.372e-11        15.1  
F_decay_x2      0.876313            NA         NA         NA   -5.94e-12       8.083  
S_initial_x2      0.152598            NA         NA         NA    6.55e-11       2.163  
S_decay_x2      0.204984            NA         NA         NA   4.206e-11      0.6181  
initial_x1       5.19928            NA         NA         NA  -1.191e-12      0.3601  
decay_x1         0.91391            NA         NA         NA   6.662e-13      0.1315  
initial_x3      0.944879            NA         NA         NA   2.736e-12     0.02247  
decay_x3         33.9921            NA         NA         NA  -1.056e-15   2.928e-15

R: minpack.lm::nls.lm 失败但结果不错

R: minpack.lm::nls.lm failed with good results

r

nls