如何运行 R 中多元回归的蒙特卡洛模拟?
How to run a montecarlo simulation for multple regression in R?
我想 运行 对预测 mpg 的多元回归模型进行 monte carlo 模拟,然后评估每辆车比另一辆汽车具有更好的性能(更低的 mpg)的次数。这是我目前得到的
library(pacman)
pacman::p_load(data.table, fixest, stargazer, dplyr, magrittr)
df <- mtcars
fit <- lm(mpg~cyl + hp, data = df)
fit$coefficients[1]
beta_0 = fit$coefficients[1] # Intercept
beta_1 = fit$coefficients[2] # Slope (cyl)
beta_2 = fit$coefficients[3] # slope (hp)
set.seed(1) # Seed
n = 1000 # Sample size
M = 500 # Number of experiments/iterations
## Storage
slope_DT <- rep(0,M)
slope_DT_2 <- rep(0,M)
intercept_DT <- rep(0,M)
## Begin Monte Carlo
for (i in 1:M){ # M is the number of iterations
# Generate data
U_i = rnorm(n, mean = 0, sd = 2) # Error
X_i = rnorm(n, mean = 5, sd = 5) # Independent variable
Y_i = beta_0 + beta_1*X_i + beta_2*X_i +U_i # Dependent variable
# Formulate data.table
data_i = data.table(Y = Y_i, X = X_i)
# Run regressions
ols_i <- fixest::feols(data = data_i, Y ~ X)
# Extract slope coefficient and save
slope_DT_2[i] <- ols_i$coefficients[3]
slope_DT[i] <- ols_i$coefficients[2]
intercept_DT[i] <- ols_i$coefficients[1]
}
# Summary statistics
estimates_DT <- data.table(beta_2 = slope_DT_2,beta_1 = slope_DT, beta_0 = intercept_DT)
此代码不会为 hp
创建任何系数我想知道如何将系数添加到模型,然后预测结果并测试一辆车的 mpg 比另一辆车低多少次。例如,马自达 RX4 的预测 mpg 比 Datsun 710 低多少次。
关于如何使这项工作的一些想法?
谢谢
正如我在评论中指出的那样,您应该使用两个自变量。此外,我想向您推荐 lapply
函数,它使代码更短,因为您不需要 initialization/Storage 部分。
estimates_DT <- do.call("rbind",lapply(1:M, function(i) {
# Generate data
U_i = rnorm(n, mean = 0, sd = 2) # Error
X_i_1 = rnorm(n, mean = 5, sd = 5) # First independent variable
X_i_2 = rnorm(n, mean = 5, sd = 5) #Second ndependent variable
Y_i = beta_0 + beta_1*X_i_1 + beta_2*X_i_2 + U_i # Dependent variable
# Formulate data.table
data_i = data.table(Y = Y_i, X1 = X_i_1, X2 = X_i_2)
# Run regressions
ols_i <- fixest::feols(data = data_i, Y ~ X1 + X2)
ols_i$coefficients
}))
estimates_DT <- setNames(data.table(estimates_DT),c("beta_0","beta_1","beta_2"))
要比较两辆车的预测,定义以下函数,将您要比较的两个车名作为参数:
compareCarEstimations <- function(carname1="Mazda RX4",carname2="Datsun 710") {
car1data <- mtcars[rownames(mtcars) == carname1,c("cyl","hp")]
car2data <- mtcars[rownames(mtcars) == carname2,c("cyl","hp")]
predsCar1 <- estimates_DT[["beta_0"]] + car1data$cyl*estimates_DT[["beta_1"]]+car1data$hp*estimates_DT[["beta_2"]]
predsCar2 <- estimates_DT[["beta_0"]] + car2data$cyl*estimates_DT[["beta_1"]]+car2data$hp*estimates_DT[["beta_2"]]
list(
car1LowerCar2 = sum(predsCar1 < predsCar2),
car2LowerCar1 = sum(predsCar1 >= predsCar2)
)
}
确保作为参数提供的名称是有效名称,例如在 rownames(mtcars)
.
我想 运行 对预测 mpg 的多元回归模型进行 monte carlo 模拟,然后评估每辆车比另一辆汽车具有更好的性能(更低的 mpg)的次数。这是我目前得到的
library(pacman)
pacman::p_load(data.table, fixest, stargazer, dplyr, magrittr)
df <- mtcars
fit <- lm(mpg~cyl + hp, data = df)
fit$coefficients[1]
beta_0 = fit$coefficients[1] # Intercept
beta_1 = fit$coefficients[2] # Slope (cyl)
beta_2 = fit$coefficients[3] # slope (hp)
set.seed(1) # Seed
n = 1000 # Sample size
M = 500 # Number of experiments/iterations
## Storage
slope_DT <- rep(0,M)
slope_DT_2 <- rep(0,M)
intercept_DT <- rep(0,M)
## Begin Monte Carlo
for (i in 1:M){ # M is the number of iterations
# Generate data
U_i = rnorm(n, mean = 0, sd = 2) # Error
X_i = rnorm(n, mean = 5, sd = 5) # Independent variable
Y_i = beta_0 + beta_1*X_i + beta_2*X_i +U_i # Dependent variable
# Formulate data.table
data_i = data.table(Y = Y_i, X = X_i)
# Run regressions
ols_i <- fixest::feols(data = data_i, Y ~ X)
# Extract slope coefficient and save
slope_DT_2[i] <- ols_i$coefficients[3]
slope_DT[i] <- ols_i$coefficients[2]
intercept_DT[i] <- ols_i$coefficients[1]
}
# Summary statistics
estimates_DT <- data.table(beta_2 = slope_DT_2,beta_1 = slope_DT, beta_0 = intercept_DT)
此代码不会为 hp
创建任何系数我想知道如何将系数添加到模型,然后预测结果并测试一辆车的 mpg 比另一辆车低多少次。例如,马自达 RX4 的预测 mpg 比 Datsun 710 低多少次。
关于如何使这项工作的一些想法?
谢谢
正如我在评论中指出的那样,您应该使用两个自变量。此外,我想向您推荐 lapply
函数,它使代码更短,因为您不需要 initialization/Storage 部分。
estimates_DT <- do.call("rbind",lapply(1:M, function(i) {
# Generate data
U_i = rnorm(n, mean = 0, sd = 2) # Error
X_i_1 = rnorm(n, mean = 5, sd = 5) # First independent variable
X_i_2 = rnorm(n, mean = 5, sd = 5) #Second ndependent variable
Y_i = beta_0 + beta_1*X_i_1 + beta_2*X_i_2 + U_i # Dependent variable
# Formulate data.table
data_i = data.table(Y = Y_i, X1 = X_i_1, X2 = X_i_2)
# Run regressions
ols_i <- fixest::feols(data = data_i, Y ~ X1 + X2)
ols_i$coefficients
}))
estimates_DT <- setNames(data.table(estimates_DT),c("beta_0","beta_1","beta_2"))
要比较两辆车的预测,定义以下函数,将您要比较的两个车名作为参数:
compareCarEstimations <- function(carname1="Mazda RX4",carname2="Datsun 710") {
car1data <- mtcars[rownames(mtcars) == carname1,c("cyl","hp")]
car2data <- mtcars[rownames(mtcars) == carname2,c("cyl","hp")]
predsCar1 <- estimates_DT[["beta_0"]] + car1data$cyl*estimates_DT[["beta_1"]]+car1data$hp*estimates_DT[["beta_2"]]
predsCar2 <- estimates_DT[["beta_0"]] + car2data$cyl*estimates_DT[["beta_1"]]+car2data$hp*estimates_DT[["beta_2"]]
list(
car1LowerCar2 = sum(predsCar1 < predsCar2),
car2LowerCar1 = sum(predsCar1 >= predsCar2)
)
}
确保作为参数提供的名称是有效名称,例如在 rownames(mtcars)
.