模型的聚类标准误差
Clustered standard errors for a model
我想计算以下回归模型的 clustered standard errors。不知道怎么定义?我不知道如何将公式翻译并应用到 R?
我的回归模型:
model3<- lm(Dishwash_dur ~ work_minutes +
work_minutes_squared +
relevel(employment, ref = "FT")+
work_minutes*relevel(employment, ref = "FT")+
dishwas_betw +
DVAge +
DMSex+
DVHsize+
Income+
NumChild+
dishwas_betw * work_minutes+
dishwas_betw * work_minutes_squared,
data = df, weights=dishwas_timedep)
Sample data:
dput(df[1:3,])
df<-structure(list(serial = c(11011209, 11011209, 11011210), pnum = c(1,
2, 2), `diary day` = c(4, 4, 5), work_minutes = c(450, 450, 480
), work_hours = c(8, 8, 0), work_minutes_squared = c(230400,
230400, 0), cooking_betw = c(0.159090909, 0.371212121, 0.271428571
), dishwas_betw = c(NA, 0.28030303, 0.271428571), houseclean_betw = c(NA_real_,
NA_real_, NA_real_), laundry_betw = c(NA_real_, NA_real_, NA_real_
), ironing_betw = c(NA_real_, NA_real_, NA_real_), tv1_betw = c(0.227272727,
0.28030303, 0.088095238), tv2_betw = c(NA_real_, NA_real_, NA_real_
), tv3_betw = c(NA_real_, NA_real_, NA_real_), tv4_betw = c(0.242424242,
0.242424242, NA), tv5_betw = c(NA_real_, NA_real_, NA_real_),
tv6_betw = c(NA, NA, NA), tv7_betw = c(NA_real_, NA_real_,
NA_real_), employment = structure(c(1L, 2L, 2L), .Label = c("FT",
"PT"), class = "factor"), cooking_timedep = c(2.773774781,
2.773774781, 3.417989523), dishwas_timedep = c(1.99418589,
1.99418589, 2.625000906), houseclean_timedep = c(5.327815177,
5.327815177, 4.750929283), laundry_timedep = c(0.964285364,
0.964285364, 3.126314747), ironing_timedep = c(2.559321793,
2.559321793, 2.292682967), TV_timedep = c(5.086074482, 5.086074482,
5.012631788), TV_dur = c(70, 240, 60), Dishwash_dur = c(0,
20, 10), Foodprep_dur = c(80, 150, 10), Cleaningdwelling_dur = c(0,
0, 10), Ironing_dur = c(0, 0, 0), Laundry_dur = c(0, 0, 0
), WorkType = c("RE", "RE", "RE"), DMSex = c(1, 2, 1), DVAge = c(69,
60, 36), DVHsize = c(2, 2, 4), Income = c(1500, 1500, 3500
), NumChild = c(0, 0, 2)), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"))
在lmtest::coeftest
中使用聚类鲁棒协方差矩阵sandwich::vcovCL
。您的数据不完整,所以我用 R 中内置的 iris
数据做了一个例子。
fit <- lm(Sepal.Length ~ ., iris)
library(lmtest);library(sandwich)
ct <- coeftest(fit, vcov.=vcovCL(fit, cluster= ~ Species))
ct
# t test of coefficients:
#
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 2.171266 0.314159 6.9114 1.438e-10 ***
# Sepal.Width 0.495889 0.121453 4.0830 7.344e-05 ***
# Petal.Length 0.829244 0.097682 8.4892 2.325e-14 ***
# Petal.Width -0.315155 0.110369 -2.8555 0.004933 **
# Speciesversicolor -0.723562 0.383713 -1.8857 0.061351 .
# Speciesvirginica -1.023498 0.528100 -1.9381 0.054571 .
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
如果出于某种原因您只需要标准误差,您可以这样做:
se <- ct[, 2]
se
# (Intercept) Sepal.Width Petal.Length Petal.Width Speciesversicolor Speciesvirginica
# 0.31415923 0.12145252 0.09768204 0.11036863 0.38371255 0.52809998
我想计算以下回归模型的 clustered standard errors。不知道怎么定义?我不知道如何将公式翻译并应用到 R?
我的回归模型:
model3<- lm(Dishwash_dur ~ work_minutes +
work_minutes_squared +
relevel(employment, ref = "FT")+
work_minutes*relevel(employment, ref = "FT")+
dishwas_betw +
DVAge +
DMSex+
DVHsize+
Income+
NumChild+
dishwas_betw * work_minutes+
dishwas_betw * work_minutes_squared,
data = df, weights=dishwas_timedep)
Sample data:
dput(df[1:3,])
df<-structure(list(serial = c(11011209, 11011209, 11011210), pnum = c(1,
2, 2), `diary day` = c(4, 4, 5), work_minutes = c(450, 450, 480
), work_hours = c(8, 8, 0), work_minutes_squared = c(230400,
230400, 0), cooking_betw = c(0.159090909, 0.371212121, 0.271428571
), dishwas_betw = c(NA, 0.28030303, 0.271428571), houseclean_betw = c(NA_real_,
NA_real_, NA_real_), laundry_betw = c(NA_real_, NA_real_, NA_real_
), ironing_betw = c(NA_real_, NA_real_, NA_real_), tv1_betw = c(0.227272727,
0.28030303, 0.088095238), tv2_betw = c(NA_real_, NA_real_, NA_real_
), tv3_betw = c(NA_real_, NA_real_, NA_real_), tv4_betw = c(0.242424242,
0.242424242, NA), tv5_betw = c(NA_real_, NA_real_, NA_real_),
tv6_betw = c(NA, NA, NA), tv7_betw = c(NA_real_, NA_real_,
NA_real_), employment = structure(c(1L, 2L, 2L), .Label = c("FT",
"PT"), class = "factor"), cooking_timedep = c(2.773774781,
2.773774781, 3.417989523), dishwas_timedep = c(1.99418589,
1.99418589, 2.625000906), houseclean_timedep = c(5.327815177,
5.327815177, 4.750929283), laundry_timedep = c(0.964285364,
0.964285364, 3.126314747), ironing_timedep = c(2.559321793,
2.559321793, 2.292682967), TV_timedep = c(5.086074482, 5.086074482,
5.012631788), TV_dur = c(70, 240, 60), Dishwash_dur = c(0,
20, 10), Foodprep_dur = c(80, 150, 10), Cleaningdwelling_dur = c(0,
0, 10), Ironing_dur = c(0, 0, 0), Laundry_dur = c(0, 0, 0
), WorkType = c("RE", "RE", "RE"), DMSex = c(1, 2, 1), DVAge = c(69,
60, 36), DVHsize = c(2, 2, 4), Income = c(1500, 1500, 3500
), NumChild = c(0, 0, 2)), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"))
在lmtest::coeftest
中使用聚类鲁棒协方差矩阵sandwich::vcovCL
。您的数据不完整,所以我用 R 中内置的 iris
数据做了一个例子。
fit <- lm(Sepal.Length ~ ., iris)
library(lmtest);library(sandwich)
ct <- coeftest(fit, vcov.=vcovCL(fit, cluster= ~ Species))
ct
# t test of coefficients:
#
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 2.171266 0.314159 6.9114 1.438e-10 ***
# Sepal.Width 0.495889 0.121453 4.0830 7.344e-05 ***
# Petal.Length 0.829244 0.097682 8.4892 2.325e-14 ***
# Petal.Width -0.315155 0.110369 -2.8555 0.004933 **
# Speciesversicolor -0.723562 0.383713 -1.8857 0.061351 .
# Speciesvirginica -1.023498 0.528100 -1.9381 0.054571 .
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
如果出于某种原因您只需要标准误差,您可以这样做:
se <- ct[, 2]
se
# (Intercept) Sepal.Width Petal.Length Petal.Width Speciesversicolor Speciesvirginica
# 0.31415923 0.12145252 0.09768204 0.11036863 0.38371255 0.52809998