使用真实数据在 R 中拟合的 SIRD 模型不起作用
SIRD model fitting in R using real data not working
我正在尝试使 R 中的 SIRD 模型适合真实数据。然而,观测值不在拟合曲线的任何位置。我不明白错误是什么或如何解决它,但我注意到更改“state”的值会产生错误
DLSODA- Warning..Internal T (=R1) and H (=R2) are
such that in the machine, T + H = T on the next step
(H = step size). Solver will continue anyway.
In above message, R1 = 0.1, R2 = 9.94667e-21
这是我的全部代码。非常感谢任何帮助!
library(deSolve)
state<-c(S=10000,I=1000,R=5000,D=100)
parameters <- c(a=180,b=0.4,g=0.2)
eqn<-function(t, state, parameters) {
with(as.list(c(state, parameters)),{
dS <- -a*I*S
dI <- a*I*S-g*I-b*I
dR <- g*I
dD <-b*I
list(c(dS,dI,dR,dD))
})
}
times <- seq(0.1,2.6,by=0.1)
out <- ode(y = state, times = times, func = eqn, parms = parameters)
out
plot(out)
library(FME)
data <- data.frame(
time = seq(0.1,2.6,0.1),
S=c(11417747943,11417733626,11417717809,11417702207,11417685587,11417670536,
11417652672,11417629493,11417603660,11417577979,11417550853,11417520318,
11417495673,11417466974,11417435119,11417399167,11417362265,11417326539,
11417286125,11417254482,11417226564,11417187020,11417143837,11417095924,
11417046477,11416989403),
I=c(3686,7062,4415,8040,7706,4316,8266,13947,13593,11207,13446,19114,5121,15400,
16658,15386,19766,21024,22426,10683,3958,15701,10290,23299,11340,29331),
R=c(9913,7193,11344,7467,8861,10671,9510,9138,12174,14400,13588,11314,19463,13165,
15098,20444,17019,14523,17874,20854,23820,23600,32641,24126,37821,27508),
D=c(54,57,56,88,50,48,87,84,58,70,92,99,58,132,95,111,112,166,108,102,139,
227,249,481,277,222)
)
cost <- function(p) {
out <- ode(state, times, eqn, p)
modCost(out, data, weight = "none")
}
fit <- modFit(f = cost, p = parameters)
summary(fit)
out1 <- ode(state, times, eqn, parameters)
out2 <- ode(state, times, eqn, coef(fit))
plot(out1, out2, obs=data, obspar=list(pch=16, col="red"))
您的代码有几个问题:
- 状态变量的数量级不同,需要
weight="std"
或weight = "mean"
- 状态变量的初始值相距甚远。这是最严重的错误。您可以手动将其设置为合理的值(见下文)或更好,适合它,请参阅 FME 文档如何完成此操作。
- 启动参数远非最佳。虽然希望算法从任意初始值收敛到最优值,但这种情况很少发生。因此,一些慎重的考虑或反复试验是不可避免的。
- 违反质量平衡,即所有 4 种状态的总和随时间变化。检查
rowSums(data[-1])
.
这是一种处理部分问题的方法。下一步将是修复质量平衡并将 ode 模型的 ode 初始状态作为非线性优化的参数。
library(deSolve)
library(FME)
eqn<-function(t, state, parameters) {
with(as.list(c(state, parameters)),{
dS <- -a*I*S
dI <- a*I*S - g*I - b*I
dR <- g*I
dD <- b*I
list(c(dS,dI,dR,dD))
})
}
data <- data.frame(
time = seq(0.1,2.6,0.1),
S=c(11417747943,11417733626,11417717809,11417702207,11417685587,11417670536,
11417652672,11417629493,11417603660,11417577979,11417550853,11417520318,
11417495673,11417466974,11417435119,11417399167,11417362265,11417326539,
11417286125,11417254482,11417226564,11417187020,11417143837,11417095924,
11417046477,11416989403),
I=c(3686,7062,4415,8040,7706,4316,8266,13947,13593,11207,13446,19114,5121,15400,
16658,15386,19766,21024,22426,10683,3958,15701,10290,23299,11340,29331),
R=c(9913,7193,11344,7467,8861,10671,9510,9138,12174,14400,13588,11314,19463,13165,
15098,20444,17019,14523,17874,20854,23820,23600,32641,24126,37821,27508),
D=c(54,57,56,88,50,48,87,84,58,70,92,99,58,132,95,111,112,166,108,102,139,
227,249,481,277,222)
)
state <- c(S=11417747943, I=5000, R=8000, D=50)
parameters <- c(a=1e-10, b=0.001, g=0.1)
times<-seq(0.1,2.6,by=0.01)
cost <- function(p) {
out <- ode(state, times, eqn, p)
modCost(out, data, weight = "mean")
}
fit <- modFit(f = cost, p = parameters)
summary(fit, corr=TRUE)
out2 <- ode(state, times, eqn, coef(fit))
plot(out2, obs=data, obspar=list(pch=16, col="red"), ylim=list(c(0, 2e10), c(0, 50000), c(0, 50000), c(0, 600)))
编辑
以下方法提高了拟合度:
- 通过将总人口设置为随时间恒定来固定质量平衡
- 重新缩放数据以提高优化的稳定性
- 根据数据猜测初始值
在优化中包含初始值(理论上)会更好,但这会再次导致参数不可识别
由于给定模型和数据的内在特征。有关相关教程示例,请参阅 twocomp_final.R。
除了数据重新缩放外,还可以考虑调整控制参数
优化器和 ode
函数,或者以不同方式重新调整各个状态变量。
然而,这里最简单的方法就是将人口重新调整为“百万人”。
## fix mass balance, i.e. make sum of all states constant
## an alternative would be an additional process in the model
## for migration and / or birth and natural death
Population <- rowSums(data[c("S", "I", "R", "D")])
data$S <- Population[1] - rowSums(data[c("I", "R", "D")])
## rescale state variables to numerically more convenient numbers
## here simply: million people
scaled_data <- cbind(
time = data$time,
data[c("S", "I", "R", "D")] * 1e-6
)
## guess initial values from data (of course a little bit subjective)
state <- c(
S = scaled_data$S[1],
I = mean(scaled_data$I[1:3]),
R = mean(scaled_data$R[1:5]),
D = mean(scaled_data$D[1:3])
)
## use good initial parameters by thinking and some trial and error
parameters <- c(a = 0.0001, b = 0.01, g = 1)
cost2 <- function(p) {
out <- ode(state, times, eqn, p)
modCost(out, scaled_data, weight = "mean")
}
## fit model, enable trace with option nprint
fit <- modFit(f = cost2, p = parameters, control = list(nprint = 1))
summary(fit, corr=TRUE)
out2 <- ode(state, times, eqn, coef(fit))
plot(out2, obs = scaled_data, obspar = list(pch = 16, col = "red"))
我正在尝试使 R 中的 SIRD 模型适合真实数据。然而,观测值不在拟合曲线的任何位置。我不明白错误是什么或如何解决它,但我注意到更改“state”的值会产生错误
DLSODA- Warning..Internal T (=R1) and H (=R2) are
such that in the machine, T + H = T on the next step
(H = step size). Solver will continue anyway.
In above message, R1 = 0.1, R2 = 9.94667e-21
这是我的全部代码。非常感谢任何帮助!
library(deSolve)
state<-c(S=10000,I=1000,R=5000,D=100)
parameters <- c(a=180,b=0.4,g=0.2)
eqn<-function(t, state, parameters) {
with(as.list(c(state, parameters)),{
dS <- -a*I*S
dI <- a*I*S-g*I-b*I
dR <- g*I
dD <-b*I
list(c(dS,dI,dR,dD))
})
}
times <- seq(0.1,2.6,by=0.1)
out <- ode(y = state, times = times, func = eqn, parms = parameters)
out
plot(out)
library(FME)
data <- data.frame(
time = seq(0.1,2.6,0.1),
S=c(11417747943,11417733626,11417717809,11417702207,11417685587,11417670536,
11417652672,11417629493,11417603660,11417577979,11417550853,11417520318,
11417495673,11417466974,11417435119,11417399167,11417362265,11417326539,
11417286125,11417254482,11417226564,11417187020,11417143837,11417095924,
11417046477,11416989403),
I=c(3686,7062,4415,8040,7706,4316,8266,13947,13593,11207,13446,19114,5121,15400,
16658,15386,19766,21024,22426,10683,3958,15701,10290,23299,11340,29331),
R=c(9913,7193,11344,7467,8861,10671,9510,9138,12174,14400,13588,11314,19463,13165,
15098,20444,17019,14523,17874,20854,23820,23600,32641,24126,37821,27508),
D=c(54,57,56,88,50,48,87,84,58,70,92,99,58,132,95,111,112,166,108,102,139,
227,249,481,277,222)
)
cost <- function(p) {
out <- ode(state, times, eqn, p)
modCost(out, data, weight = "none")
}
fit <- modFit(f = cost, p = parameters)
summary(fit)
out1 <- ode(state, times, eqn, parameters)
out2 <- ode(state, times, eqn, coef(fit))
plot(out1, out2, obs=data, obspar=list(pch=16, col="red"))
您的代码有几个问题:
- 状态变量的数量级不同,需要
weight="std"
或weight = "mean"
- 状态变量的初始值相距甚远。这是最严重的错误。您可以手动将其设置为合理的值(见下文)或更好,适合它,请参阅 FME 文档如何完成此操作。
- 启动参数远非最佳。虽然希望算法从任意初始值收敛到最优值,但这种情况很少发生。因此,一些慎重的考虑或反复试验是不可避免的。
- 违反质量平衡,即所有 4 种状态的总和随时间变化。检查
rowSums(data[-1])
.
这是一种处理部分问题的方法。下一步将是修复质量平衡并将 ode 模型的 ode 初始状态作为非线性优化的参数。
library(deSolve)
library(FME)
eqn<-function(t, state, parameters) {
with(as.list(c(state, parameters)),{
dS <- -a*I*S
dI <- a*I*S - g*I - b*I
dR <- g*I
dD <- b*I
list(c(dS,dI,dR,dD))
})
}
data <- data.frame(
time = seq(0.1,2.6,0.1),
S=c(11417747943,11417733626,11417717809,11417702207,11417685587,11417670536,
11417652672,11417629493,11417603660,11417577979,11417550853,11417520318,
11417495673,11417466974,11417435119,11417399167,11417362265,11417326539,
11417286125,11417254482,11417226564,11417187020,11417143837,11417095924,
11417046477,11416989403),
I=c(3686,7062,4415,8040,7706,4316,8266,13947,13593,11207,13446,19114,5121,15400,
16658,15386,19766,21024,22426,10683,3958,15701,10290,23299,11340,29331),
R=c(9913,7193,11344,7467,8861,10671,9510,9138,12174,14400,13588,11314,19463,13165,
15098,20444,17019,14523,17874,20854,23820,23600,32641,24126,37821,27508),
D=c(54,57,56,88,50,48,87,84,58,70,92,99,58,132,95,111,112,166,108,102,139,
227,249,481,277,222)
)
state <- c(S=11417747943, I=5000, R=8000, D=50)
parameters <- c(a=1e-10, b=0.001, g=0.1)
times<-seq(0.1,2.6,by=0.01)
cost <- function(p) {
out <- ode(state, times, eqn, p)
modCost(out, data, weight = "mean")
}
fit <- modFit(f = cost, p = parameters)
summary(fit, corr=TRUE)
out2 <- ode(state, times, eqn, coef(fit))
plot(out2, obs=data, obspar=list(pch=16, col="red"), ylim=list(c(0, 2e10), c(0, 50000), c(0, 50000), c(0, 600)))
编辑
以下方法提高了拟合度:
- 通过将总人口设置为随时间恒定来固定质量平衡
- 重新缩放数据以提高优化的稳定性
- 根据数据猜测初始值
在优化中包含初始值(理论上)会更好,但这会再次导致参数不可识别 由于给定模型和数据的内在特征。有关相关教程示例,请参阅 twocomp_final.R。
除了数据重新缩放外,还可以考虑调整控制参数
优化器和 ode
函数,或者以不同方式重新调整各个状态变量。
然而,这里最简单的方法就是将人口重新调整为“百万人”。
## fix mass balance, i.e. make sum of all states constant
## an alternative would be an additional process in the model
## for migration and / or birth and natural death
Population <- rowSums(data[c("S", "I", "R", "D")])
data$S <- Population[1] - rowSums(data[c("I", "R", "D")])
## rescale state variables to numerically more convenient numbers
## here simply: million people
scaled_data <- cbind(
time = data$time,
data[c("S", "I", "R", "D")] * 1e-6
)
## guess initial values from data (of course a little bit subjective)
state <- c(
S = scaled_data$S[1],
I = mean(scaled_data$I[1:3]),
R = mean(scaled_data$R[1:5]),
D = mean(scaled_data$D[1:3])
)
## use good initial parameters by thinking and some trial and error
parameters <- c(a = 0.0001, b = 0.01, g = 1)
cost2 <- function(p) {
out <- ode(state, times, eqn, p)
modCost(out, scaled_data, weight = "mean")
}
## fit model, enable trace with option nprint
fit <- modFit(f = cost2, p = parameters, control = list(nprint = 1))
summary(fit, corr=TRUE)
out2 <- ode(state, times, eqn, coef(fit))
plot(out2, obs = scaled_data, obspar = list(pch = 16, col = "red"))