使用成本函数约束 nlsLM()
Constrain nlsLM() with a cost function
我有一个数据框df
df<- structure(list(ID = structure(c(1L, 3L, 5L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
31L, 32L, 33L, 36L, 37L, 40L, 41L, 44L, 45L, 46L, 47L, 48L, 49L,
50L, 52L, 53L, 54L, 56L, 57L, 58L, 60L, 62L, 63L, 66L, 67L, 68L,
69L, 70L, 71L, 72L, 75L, 77L, 80L, 81L, 82L, 88L, 93L, 95L, 97L,
99L, 101L, 102L, 107L, 108L, 114L), .Label = c("AU-Tum", "AU-Wac",
"BE-Bra", "BE-Jal", "BE-Vie", "BR-Cax", "BR-Ma2", "BR-Sa1", "BR-Sa3",
"BW-Ma1", "CA-Ca1", "CA-Ca2", "CA-Ca3", "CA-Gro", "Ca-Man", "CA-NS1",
"CA-NS2", "CA-NS3", "CA-NS4", "CA-NS5", "CA-NS6", "CA-NS7", "CA-Oas",
"CA-Obs", "CA-Ojp", "CA-Qcu", "CA-Qfo", "CA-SF1", "CA-SF2", "CA-SF3",
"CA-SJ1", "CA-SJ2", "CA-SJ3", "CA-TP1", "CA-TP2", "CA-TP4", "CN-Cha",
"CN-Ku1", "CZ-Bk1", "De-Bay", "DE-Hai", "DE-Har", "DE-Meh", "DE-Tha",
"DE-Wet", "DK-Sor", "ES-Es1", "FI-Hyy", "FI-Sod", "FR-Fon", "FR-Hes",
"FR-Lbr", "FR-Pue", "GF-Guy", "ID-Pag", "IL-Yat", "IT-Col", "IT-Cpz",
"IT-Lav", "IT-Lma", "IT-Noe", "IT-Non", "IT-Pt1", "IT-Ro1", "IT-Ro2",
"IT-Sro", "JP-Tak", "JP-Tef", "JP-Tom", "NL-Loo", "PT-Esp", "RU-Fyo",
"RU-Zot", "SE-Abi", "SE-Fla", "SE-Nor", "SE-Sk1", "SE-Sk2", "SE-St1",
"UK-Gri", "UK-Ham", "US-Bar", "US-Blo", "US-Bn1", "US-Bn2", "Us-Bn3",
"US-Dk2", "US-Dk3", "US-Fmf", "US-Fwf", "US-Ha1", "US-Ha2", "US-Ho1",
"US-Ho2", "US-Lph", "US-Me1", "US-Me3", "US-Nc2", "US-NR1", "US-Oho",
"US-So2", "US-So3", "US-Sp1", "US-Sp2", "US-Sp3", "US-Syv", "US-Umb",
"US-Wbw", "US-Wcr", "US-Wi0", "US-Wi1", "US-Wi2", "US-Wi4", "US-Wi8",
"VU-Coc", "CA-Cbo", "CN-Lao", "ID-Buk", "IS-Gun", "JP-Fuj", "MY-Pso",
"RU-Ab", "RU-Be", "RU-Mix", "TH-Mae"), class = "factor"), Y = c(436.783385497984,
55.1825021383702, 526.4133417369, 391.49284084118, -519.814235572849,
11.5525291214872, 162.441016515717, 39.0395567645998, -70.4910326673707,
17.1155716306239, -106.326129257097, -94.9308303585276, -66.4285516217351,
-144.929052323413, -220.613145695315, 157.129576861289, 44.1257786633602,
46.8326830295943, -146.719591499443, 30.8043649939355, -4.10548956954153,
-108.258462657337, 90.3369144331664, 126.866108251153, 42.9489971246803,
-45.4886732113082, 483.932040393885, 590.754048774834, 82.1480000555981,
76.8863707484328, 404.007940533033, 202.629066249886, -46.9675149230141,
557.939170770813, 300.979565786038, 224.256197650044, 148.719307398695,
201.195892312115, 466.727302447427, 552.762670615377, 595.145436977735,
481.359543363331, 467.379381521489, 444.812610935212, 308.198167469197,
-638.973101716489, 321.395064735785, 181.896345832773, 629.214319321327,
-176.181996958815, 59.1716887350485, -186.42650026083, 515.533437888983,
595.091753601562, 367.15020653978, 934.415348643437, 255.499246957091,
368.347069109092, 141.97570288631, 39.5917358684237, 105.039591642989,
77.9087587283187, 153.700042322307, 157.436949779134, 358.242634316906
), Unc = c(-2.87896446519996, -0.30731156873436, -1.3811336535939,
-3.60168125065523, 1.35359565655672, -0.58525692609091, -0.463995294634932,
-0.112770209421705, -0.178508318809592, -0.44506337354913, 0.285085608169751,
0.241425707960461, -0.616179720920167, -0.00579570274186878,
0.385699289486463, -0.43071884834486, -1.32799753416588, -0.138248737701239,
0.026437443324628, -0.0981101016865843, -0.125326368431498, 0.289668409902704,
-0.224679559714174, -0.376257445433255, -0.0904535633017475,
-1.27942478849042, -2.78944896222686, -1.57015451106923, -3.02435991211342,
-0.188885650489005, -2.77697810019308, -0.683634153351544, 0.148164853468482,
-1.520102142822, -0.855422614115418, -0.580609573477037, -2.12306082165876,
-1.2334420909422, -2.00323122411995, -1.45967340674881, -1.60448511158608,
-2.52530298868671, -1.28908559855364, -1.16270411420386, -1.5186009244046,
0.24330408272554, -1.72852090322909, -0.497423296440042, -2.79905035399537,
0.453520174531953, -0.38557736709315, 0.513504024431323, -1.58608831551316,
-1.56046815861851, -3.32259575879769, -7.99135003959363, -0.913109035398266,
-3.48447862397436, -0.518022500487711, -0.352263975401941, -0.331662926968978,
-0.236234610041281, -2.31039763656225, -0.987148209221828, -3.37441047823435
), X = c(98.5, 77, 63.2222222222222, 52.5, 3.5, 15.5, 71, 161.833333333333,
153.5, 73, 39, 40, 23, 14, 5.5, 78, 129.5, 73.5, 4, 100, 10,
3, 30, 65.5, 198, 45.5, 20, 111.5, 44, 68.5, 102.5, 39.1111111111111,
83.8, 136, 31.5, 56.5, 101, 39.25, 108.5, 52.1666666666667, 54.5,
9.5, 13, 52.1428571428571, 66.5, 1, 44.25, 106, 19, 202.571428571429,
36.6, 2, 21.2, 69, 67.5, 21, 135, 46.5, 17.5, 96, 80.6666666666667,
10.6666666666667, 86.5, 66.2, 2.5)), .Names = c("ID", "Y", "Unc",
"X"), row.names = c(1L, 2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 67L), class = "data.frame", na.action = structure(c(4L,
52L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L), .Names = c("4",
"52", "68", "69", "70", "71", "72", "73", "74", "75", "76", "77"
), class = "omit"))
我目前正在执行一个带有留一法 ID 的非线性回归分析,如下所示:
library (minpack.lm)
id<-unique(df$ID)
nlm1_pred<- c()
for (i in id){
nlm1<- try(nlsLM(Y~offset + A*(1-exp(k*X)), data = df[df$ID != i,],
start = list(A=192.93829, k=-0.08976, offset=-700), control = list(maxiter = 500)), silent=TRUE);
nlm1_pred[[i]]<- if (inherits(nlm1, "nls")) sim = predict(nlm1, newdata=df[df$ID == i,]) else NA;
}
基本上,对于每次迭代,我删除一个 ID 并使用其他 ID 执行非线性回归模型,然后用于预测遗漏 ID 的响应。
但是,用于创建非线性模型的每个观察值都具有不同的不确定性值(请参阅 df
中的 Unc
列)。因此,我想根据我观察的不确定性在 nls 中包含一个成本函数。我做了一些研究,下面的成本函数应该可以完成工作:cost= sum(((obs-pred)/ Unc)^2)
这基本上是不确定性加权的平方误差之和 (SEE)。
我很难找到将此成本函数包含在 nlsLM()
对象中的方法。我已经看到 weights
参数可以包含在 nlsLM()
对象中,但是这个参数的可能性看起来非常有限。
到目前为止,这是我通过在 nlsLM() 函数中包含权重参数所做的尝试,但到目前为止它不起作用:
id<-unique(df$ID)
nlm1_pred<- c()
for (i in id){
nlm1<- try(nlsLM(Y~offset + A*(1-exp(k*X)), data = df[df$ID != i,],
start = list(A=192.93829, k=-0.08976, offset=-700), control = list(maxiter = 500), weights = wfct(sum(((fitted-Y)/Unc)^2))), silent=TRUE);
nlm1_pred[[i]]<- if (inherits(nlm1, "nls")) sim = predict(nlm1, newdata=df[df$ID == i,]) else NA;
}
您可以使用1/Unc^2
作为nlsLM
中的权重,得到问题中称为cost
和下面obj.w
的objective函数。
为了说明,下面使用 optim
和 nlsLM
的两个未加权示例给出相同的结果,而两个加权示例(也使用 optim
和 nlsLM
)给出同样的结果。由于这里的 objective 是为了表明权重相同,我们在每种情况下使用的起始值都非常接近最终值,因此算法的差异不会发挥作用。
rhs <- function(p, df) with(df, p[[3]] + p[[1]]*(1-exp(p[[2]]*X)))
# unweighted
st.u <- list(A = 800, k = -0.2, offset = -700)
obj.u <- function(p, df) with(df, sum((Y - rhs(p, df))^2))
fit.optim.u <- optim(st.u, obj.u, df = df)
fit.optim.u$par; fit.optim.u$value # 1
fit.nlsLM.u <- nlsLM(Y ~ rhs(c(A, k, offset), df), data = df, start = st.u)
coef(fit.nlsLM.u); deviance(fit.nlsLM.u) # 2 - same as 1
# weighted
st.w <- list(A = 2704, k = -1.7, offset = -2845)
obj.w <- function(p, df) with(df, sum((Y - rhs(p, df))^2/Unc^2))
fit.optim.w <- optim(st.w, obj.w, df = df)
fit.optim.w$par; fit.optim.w$value # 3
fit.nlsLM.w <- nlsLM(Y ~ rhs(c(A, k, offset), df), data = df, weights = 1/Unc^2,
start = st.w)
coef(fit.nlsLM.w); deviance(fit.nlsLM.w) # 4 = same as 3
似乎你想要的 objective function/weights 仍然不清楚,所以你可能想使用 optim
玩上面的例子,直到你找到 objective 函数(因为 objective 是显式的),然后在 nlsLM 中实现它,检查它们是否给出相同的答案。请注意 optim
和 nlsLM
可以找到不同的答案,即使您使用相同的 objectives/weights 由于不同的算法和停止标准所以如果您 运行 两者都找到然后尝试以找到的最佳系数开始它们以消除差异。
所有这一切的要点是,由于使用 optim
我们明确指定了 objective 而不是使用 nlsLM
它提供了一种双重检查 nlsLM
是什么的方法做。
更新: 重新设计,重点比较 optim 和 nlsLM。
我有一个数据框df
df<- structure(list(ID = structure(c(1L, 3L, 5L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
31L, 32L, 33L, 36L, 37L, 40L, 41L, 44L, 45L, 46L, 47L, 48L, 49L,
50L, 52L, 53L, 54L, 56L, 57L, 58L, 60L, 62L, 63L, 66L, 67L, 68L,
69L, 70L, 71L, 72L, 75L, 77L, 80L, 81L, 82L, 88L, 93L, 95L, 97L,
99L, 101L, 102L, 107L, 108L, 114L), .Label = c("AU-Tum", "AU-Wac",
"BE-Bra", "BE-Jal", "BE-Vie", "BR-Cax", "BR-Ma2", "BR-Sa1", "BR-Sa3",
"BW-Ma1", "CA-Ca1", "CA-Ca2", "CA-Ca3", "CA-Gro", "Ca-Man", "CA-NS1",
"CA-NS2", "CA-NS3", "CA-NS4", "CA-NS5", "CA-NS6", "CA-NS7", "CA-Oas",
"CA-Obs", "CA-Ojp", "CA-Qcu", "CA-Qfo", "CA-SF1", "CA-SF2", "CA-SF3",
"CA-SJ1", "CA-SJ2", "CA-SJ3", "CA-TP1", "CA-TP2", "CA-TP4", "CN-Cha",
"CN-Ku1", "CZ-Bk1", "De-Bay", "DE-Hai", "DE-Har", "DE-Meh", "DE-Tha",
"DE-Wet", "DK-Sor", "ES-Es1", "FI-Hyy", "FI-Sod", "FR-Fon", "FR-Hes",
"FR-Lbr", "FR-Pue", "GF-Guy", "ID-Pag", "IL-Yat", "IT-Col", "IT-Cpz",
"IT-Lav", "IT-Lma", "IT-Noe", "IT-Non", "IT-Pt1", "IT-Ro1", "IT-Ro2",
"IT-Sro", "JP-Tak", "JP-Tef", "JP-Tom", "NL-Loo", "PT-Esp", "RU-Fyo",
"RU-Zot", "SE-Abi", "SE-Fla", "SE-Nor", "SE-Sk1", "SE-Sk2", "SE-St1",
"UK-Gri", "UK-Ham", "US-Bar", "US-Blo", "US-Bn1", "US-Bn2", "Us-Bn3",
"US-Dk2", "US-Dk3", "US-Fmf", "US-Fwf", "US-Ha1", "US-Ha2", "US-Ho1",
"US-Ho2", "US-Lph", "US-Me1", "US-Me3", "US-Nc2", "US-NR1", "US-Oho",
"US-So2", "US-So3", "US-Sp1", "US-Sp2", "US-Sp3", "US-Syv", "US-Umb",
"US-Wbw", "US-Wcr", "US-Wi0", "US-Wi1", "US-Wi2", "US-Wi4", "US-Wi8",
"VU-Coc", "CA-Cbo", "CN-Lao", "ID-Buk", "IS-Gun", "JP-Fuj", "MY-Pso",
"RU-Ab", "RU-Be", "RU-Mix", "TH-Mae"), class = "factor"), Y = c(436.783385497984,
55.1825021383702, 526.4133417369, 391.49284084118, -519.814235572849,
11.5525291214872, 162.441016515717, 39.0395567645998, -70.4910326673707,
17.1155716306239, -106.326129257097, -94.9308303585276, -66.4285516217351,
-144.929052323413, -220.613145695315, 157.129576861289, 44.1257786633602,
46.8326830295943, -146.719591499443, 30.8043649939355, -4.10548956954153,
-108.258462657337, 90.3369144331664, 126.866108251153, 42.9489971246803,
-45.4886732113082, 483.932040393885, 590.754048774834, 82.1480000555981,
76.8863707484328, 404.007940533033, 202.629066249886, -46.9675149230141,
557.939170770813, 300.979565786038, 224.256197650044, 148.719307398695,
201.195892312115, 466.727302447427, 552.762670615377, 595.145436977735,
481.359543363331, 467.379381521489, 444.812610935212, 308.198167469197,
-638.973101716489, 321.395064735785, 181.896345832773, 629.214319321327,
-176.181996958815, 59.1716887350485, -186.42650026083, 515.533437888983,
595.091753601562, 367.15020653978, 934.415348643437, 255.499246957091,
368.347069109092, 141.97570288631, 39.5917358684237, 105.039591642989,
77.9087587283187, 153.700042322307, 157.436949779134, 358.242634316906
), Unc = c(-2.87896446519996, -0.30731156873436, -1.3811336535939,
-3.60168125065523, 1.35359565655672, -0.58525692609091, -0.463995294634932,
-0.112770209421705, -0.178508318809592, -0.44506337354913, 0.285085608169751,
0.241425707960461, -0.616179720920167, -0.00579570274186878,
0.385699289486463, -0.43071884834486, -1.32799753416588, -0.138248737701239,
0.026437443324628, -0.0981101016865843, -0.125326368431498, 0.289668409902704,
-0.224679559714174, -0.376257445433255, -0.0904535633017475,
-1.27942478849042, -2.78944896222686, -1.57015451106923, -3.02435991211342,
-0.188885650489005, -2.77697810019308, -0.683634153351544, 0.148164853468482,
-1.520102142822, -0.855422614115418, -0.580609573477037, -2.12306082165876,
-1.2334420909422, -2.00323122411995, -1.45967340674881, -1.60448511158608,
-2.52530298868671, -1.28908559855364, -1.16270411420386, -1.5186009244046,
0.24330408272554, -1.72852090322909, -0.497423296440042, -2.79905035399537,
0.453520174531953, -0.38557736709315, 0.513504024431323, -1.58608831551316,
-1.56046815861851, -3.32259575879769, -7.99135003959363, -0.913109035398266,
-3.48447862397436, -0.518022500487711, -0.352263975401941, -0.331662926968978,
-0.236234610041281, -2.31039763656225, -0.987148209221828, -3.37441047823435
), X = c(98.5, 77, 63.2222222222222, 52.5, 3.5, 15.5, 71, 161.833333333333,
153.5, 73, 39, 40, 23, 14, 5.5, 78, 129.5, 73.5, 4, 100, 10,
3, 30, 65.5, 198, 45.5, 20, 111.5, 44, 68.5, 102.5, 39.1111111111111,
83.8, 136, 31.5, 56.5, 101, 39.25, 108.5, 52.1666666666667, 54.5,
9.5, 13, 52.1428571428571, 66.5, 1, 44.25, 106, 19, 202.571428571429,
36.6, 2, 21.2, 69, 67.5, 21, 135, 46.5, 17.5, 96, 80.6666666666667,
10.6666666666667, 86.5, 66.2, 2.5)), .Names = c("ID", "Y", "Unc",
"X"), row.names = c(1L, 2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 67L), class = "data.frame", na.action = structure(c(4L,
52L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L), .Names = c("4",
"52", "68", "69", "70", "71", "72", "73", "74", "75", "76", "77"
), class = "omit"))
我目前正在执行一个带有留一法 ID 的非线性回归分析,如下所示:
library (minpack.lm)
id<-unique(df$ID)
nlm1_pred<- c()
for (i in id){
nlm1<- try(nlsLM(Y~offset + A*(1-exp(k*X)), data = df[df$ID != i,],
start = list(A=192.93829, k=-0.08976, offset=-700), control = list(maxiter = 500)), silent=TRUE);
nlm1_pred[[i]]<- if (inherits(nlm1, "nls")) sim = predict(nlm1, newdata=df[df$ID == i,]) else NA;
}
基本上,对于每次迭代,我删除一个 ID 并使用其他 ID 执行非线性回归模型,然后用于预测遗漏 ID 的响应。
但是,用于创建非线性模型的每个观察值都具有不同的不确定性值(请参阅 df
中的 Unc
列)。因此,我想根据我观察的不确定性在 nls 中包含一个成本函数。我做了一些研究,下面的成本函数应该可以完成工作:cost= sum(((obs-pred)/ Unc)^2)
这基本上是不确定性加权的平方误差之和 (SEE)。
我很难找到将此成本函数包含在 nlsLM()
对象中的方法。我已经看到 weights
参数可以包含在 nlsLM()
对象中,但是这个参数的可能性看起来非常有限。
到目前为止,这是我通过在 nlsLM() 函数中包含权重参数所做的尝试,但到目前为止它不起作用:
id<-unique(df$ID)
nlm1_pred<- c()
for (i in id){
nlm1<- try(nlsLM(Y~offset + A*(1-exp(k*X)), data = df[df$ID != i,],
start = list(A=192.93829, k=-0.08976, offset=-700), control = list(maxiter = 500), weights = wfct(sum(((fitted-Y)/Unc)^2))), silent=TRUE);
nlm1_pred[[i]]<- if (inherits(nlm1, "nls")) sim = predict(nlm1, newdata=df[df$ID == i,]) else NA;
}
您可以使用1/Unc^2
作为nlsLM
中的权重,得到问题中称为cost
和下面obj.w
的objective函数。
为了说明,下面使用 optim
和 nlsLM
的两个未加权示例给出相同的结果,而两个加权示例(也使用 optim
和 nlsLM
)给出同样的结果。由于这里的 objective 是为了表明权重相同,我们在每种情况下使用的起始值都非常接近最终值,因此算法的差异不会发挥作用。
rhs <- function(p, df) with(df, p[[3]] + p[[1]]*(1-exp(p[[2]]*X)))
# unweighted
st.u <- list(A = 800, k = -0.2, offset = -700)
obj.u <- function(p, df) with(df, sum((Y - rhs(p, df))^2))
fit.optim.u <- optim(st.u, obj.u, df = df)
fit.optim.u$par; fit.optim.u$value # 1
fit.nlsLM.u <- nlsLM(Y ~ rhs(c(A, k, offset), df), data = df, start = st.u)
coef(fit.nlsLM.u); deviance(fit.nlsLM.u) # 2 - same as 1
# weighted
st.w <- list(A = 2704, k = -1.7, offset = -2845)
obj.w <- function(p, df) with(df, sum((Y - rhs(p, df))^2/Unc^2))
fit.optim.w <- optim(st.w, obj.w, df = df)
fit.optim.w$par; fit.optim.w$value # 3
fit.nlsLM.w <- nlsLM(Y ~ rhs(c(A, k, offset), df), data = df, weights = 1/Unc^2,
start = st.w)
coef(fit.nlsLM.w); deviance(fit.nlsLM.w) # 4 = same as 3
似乎你想要的 objective function/weights 仍然不清楚,所以你可能想使用 optim
玩上面的例子,直到你找到 objective 函数(因为 objective 是显式的),然后在 nlsLM 中实现它,检查它们是否给出相同的答案。请注意 optim
和 nlsLM
可以找到不同的答案,即使您使用相同的 objectives/weights 由于不同的算法和停止标准所以如果您 运行 两者都找到然后尝试以找到的最佳系数开始它们以消除差异。
所有这一切的要点是,由于使用 optim
我们明确指定了 objective 而不是使用 nlsLM
它提供了一种双重检查 nlsLM
是什么的方法做。
更新: 重新设计,重点比较 optim 和 nlsLM。