lm 回归和大列表中的很多变量
A lot of variables in lm regression and large list
我的任务是对虚拟变量进行一些研究。这是一个 R 代码:
parameters_estimation2 <- function(n)
{
age <- sample(20:40, n, replace=TRUE)
male <- sample(0:1, n, replace=TRUE)
education <- sample(0:6, n, replace=TRUE)
experience <- floor(rexp(n, 0.2))
for(i in 1:n)
{if(experience[i]>15) {
experience[i] <- floor(rexp(1, 0.2))
if(experience[i]>15) { i <- i-1 }
}}
sqexperience <- experience*experience
e <- rnorm(n, 0, 4)
B0 <- -200; B1 <- 15; B2 <- 100; B3 <-10; B4 <- 5; B5 <-20;
wage <- B0 + B1*age + B2*male + B3*education+ B4*experience+ B5*sqexperience+e
#Dummy making
expe1 <- c(rep(0,n)); expe2 <- c(rep(0,n)); expe3 <- c(rep(0,n)); expe4 <- c(rep(0,n));
expe5 <- c(rep(0,n)); expe6 <- c(rep(0,n)); expe7 <- c(rep(0,n)); expe8 <- c(rep(0,n));
expe9 <- c(rep(0,n)); expe10 <- c(rep(0,n)); expe11 <- c(rep(0,n)); expe12 <- c(rep(0,n));
expe13 <- c(rep(0,n)); expe14 <- c(rep(0,n)); expe15 <- c(rep(0,n));
for(i in 1:n)
{
if(experience[i]==1) { expe1[i] <-1
} else if(experience[i]==2) { expe2[i] <-1
} else if(experience[i]==3) { expe3[i] <-1
} else if(experience[i]==4) { expe4[i] <-1
} else if(experience[i]==5) { expe5[i] <-1
} else if(experience[i]==6) { expe6[i] <-1
} else if(experience[i]==7) { expe7[i] <-1
} else if(experience[i]==8) { expe8[i] <-1
} else if(experience[i]==9) { expe9[i] <-1
} else if(experience[i]==10) { expe10[i] <-1
} else if(experience[i]==11) { expe11[i] <-1
} else if(experience[i]==12) { expe12[i] <-1
} else if(experience[i]==13) { expe13[i] <-1
} else if(experience[i]==14) { expe14[i] <-1
} else if(experience[i]==15) { expe15[i] <-1
}}
regression<-lm(wage~age+male+education+expe1+expe2+expe3+expe4+expe5+expe6+expe7+expe8+expe9+expe10+expe11+expe12+expe13+expe14+expe15)
return(summary(regression)$coefficients[,"Estimate"])
}
times <- 1000
size <- rep(200, times)
koeficientai1 <-mapply(parameters_estimation2, size)
blah <- as.data.table(koeficientai1)
beta0sample200d <- mean(koeficientai1[,"(Intercept)"])
问题是在最后一行我得到:
Error in koeficientai1[, "(Intercept)"] : incorrect number of dimensions
我认为问题在于 koeficientai1
是一个大列表。但是后来我尝试了另一个只有 5 个变量的 lm 回归,代码运行正常,我得到了简单的数据框。
尝试将最后一行替换为
beta0sample200d <- mean(sapply(koeficientai1, function(x) x["(Intercept)"]))
koeficientai1
是一个列表,但您尝试将其作为 data.frame 访问,因此出现错误消息。
sapply
从 koeficientai1
中的每个列表元素中提取名为 (Intercept)
的元素(在您的情况下,每个列表元素都是一个命名向量)和 returns 一个向量包含结果。
我的任务是对虚拟变量进行一些研究。这是一个 R 代码:
parameters_estimation2 <- function(n)
{
age <- sample(20:40, n, replace=TRUE)
male <- sample(0:1, n, replace=TRUE)
education <- sample(0:6, n, replace=TRUE)
experience <- floor(rexp(n, 0.2))
for(i in 1:n)
{if(experience[i]>15) {
experience[i] <- floor(rexp(1, 0.2))
if(experience[i]>15) { i <- i-1 }
}}
sqexperience <- experience*experience
e <- rnorm(n, 0, 4)
B0 <- -200; B1 <- 15; B2 <- 100; B3 <-10; B4 <- 5; B5 <-20;
wage <- B0 + B1*age + B2*male + B3*education+ B4*experience+ B5*sqexperience+e
#Dummy making
expe1 <- c(rep(0,n)); expe2 <- c(rep(0,n)); expe3 <- c(rep(0,n)); expe4 <- c(rep(0,n));
expe5 <- c(rep(0,n)); expe6 <- c(rep(0,n)); expe7 <- c(rep(0,n)); expe8 <- c(rep(0,n));
expe9 <- c(rep(0,n)); expe10 <- c(rep(0,n)); expe11 <- c(rep(0,n)); expe12 <- c(rep(0,n));
expe13 <- c(rep(0,n)); expe14 <- c(rep(0,n)); expe15 <- c(rep(0,n));
for(i in 1:n)
{
if(experience[i]==1) { expe1[i] <-1
} else if(experience[i]==2) { expe2[i] <-1
} else if(experience[i]==3) { expe3[i] <-1
} else if(experience[i]==4) { expe4[i] <-1
} else if(experience[i]==5) { expe5[i] <-1
} else if(experience[i]==6) { expe6[i] <-1
} else if(experience[i]==7) { expe7[i] <-1
} else if(experience[i]==8) { expe8[i] <-1
} else if(experience[i]==9) { expe9[i] <-1
} else if(experience[i]==10) { expe10[i] <-1
} else if(experience[i]==11) { expe11[i] <-1
} else if(experience[i]==12) { expe12[i] <-1
} else if(experience[i]==13) { expe13[i] <-1
} else if(experience[i]==14) { expe14[i] <-1
} else if(experience[i]==15) { expe15[i] <-1
}}
regression<-lm(wage~age+male+education+expe1+expe2+expe3+expe4+expe5+expe6+expe7+expe8+expe9+expe10+expe11+expe12+expe13+expe14+expe15)
return(summary(regression)$coefficients[,"Estimate"])
}
times <- 1000
size <- rep(200, times)
koeficientai1 <-mapply(parameters_estimation2, size)
blah <- as.data.table(koeficientai1)
beta0sample200d <- mean(koeficientai1[,"(Intercept)"])
问题是在最后一行我得到:
Error in koeficientai1[, "(Intercept)"] : incorrect number of dimensions
我认为问题在于 koeficientai1
是一个大列表。但是后来我尝试了另一个只有 5 个变量的 lm 回归,代码运行正常,我得到了简单的数据框。
尝试将最后一行替换为
beta0sample200d <- mean(sapply(koeficientai1, function(x) x["(Intercept)"]))
koeficientai1
是一个列表,但您尝试将其作为 data.frame 访问,因此出现错误消息。
sapply
从 koeficientai1
中的每个列表元素中提取名为 (Intercept)
的元素(在您的情况下,每个列表元素都是一个命名向量)和 returns 一个向量包含结果。