如何从 plm FE 回归中获得 between 和 overall R2?
How to get between and overall R2 from plm FE regression?
有没有办法让 plm()
为我计算 R2 和整体 R2 并将它们包含在 summary()
输出中?
要阐明我在 R2 之间、整体和内部的意思,请参阅 StackExchange 上的此答案。
我的理解是plm只在R2内计算。
我是 运行 模型中的双向效应。
随机示例(改编自here):
library(plm)
# Create some random data
set.seed(1)
x=rnorm(100); fe=rep(rnorm(10),each=10); id=rep(1:10,each=10); ti=rep(1:10,10); e=rnorm(100)
y=x+fe+e
data=data.frame(y,x,id,ti)
# Get plm within R2
reg=plm(y~x,model="within",index=c("id","ti"), effect = "twoways", data=data)
summary(reg)$r.squared
我现在也想得到整体和R2之间:
# Pooled Version (overall R2)
reg1=lm(y~x)
summary(reg1)$r.squared
# Between R2
y.means=tapply(y,id,mean)[id]
x.means=tapply(x,id,mean)[id]
reg2=lm(y.means~x.means)
summary(reg3)$r.squared
{plm}
似乎无法报告总体或引用-非引用 R 平方值。您可以通过创建自定义 summary
和 print
方法来破解它:
summary.plm.full <- function (object, vcov = NULL, ...)
{
vcov_arg <- vcov
#add plm::: for plm functions so they are calllex correctly
model <- plm:::describe(object, "model")
effect <- plm:::describe(object, "effect")
random.method <- plm:::describe(object, "random.method")
object$r.squared <- c(rsq = r.squared(object),
adjrsq = r.squared(object, dfcor = TRUE),
# add the two new r squared terms here
rsq_overall = r.squared(object, model = "pooled"),
rsq_btw = r.squared(update(object, effect = "individual", model = "between")))
use.norm.chisq <- FALSE
if (model == "random")
use.norm.chisq <- TRUE
if (length(formula(object))[2] >= 2)
use.norm.chisq <- TRUE
if (model == "ht")
use.norm.chisq <- TRUE
object$fstatistic <- pwaldtest(object, test = ifelse(use.norm.chisq,
"Chisq", "F"), vcov = vcov_arg)
if (!is.null(vcov_arg)) {
if (is.matrix(vcov_arg))
rvcov <- vcov_arg
if (is.function(vcov_arg))
rvcov <- vcov_arg(object)
std.err <- sqrt(diag(rvcov))
}
else {
std.err <- sqrt(diag(stats::vcov(object)))
}
b <- coefficients(object)
z <- b/std.err
p <- if (use.norm.chisq) {
2 * pnorm(abs(z), lower.tail = FALSE)
}
else {
2 * pt(abs(z), df = object$df.residual, lower.tail = FALSE)
}
object$coefficients <- cbind(b, std.err, z, p)
colnames(object$coefficients) <- if (use.norm.chisq) {
c("Estimate", "Std. Error", "z-value", "Pr(>|z|)")
}
else {
c("Estimate", "Std. Error", "t-value", "Pr(>|t|)")
}
if (!is.null(vcov_arg)) {
object$rvcov <- rvcov
rvcov.name <- paste0(deparse(substitute(vcov)))
attr(object$rvcov, which = "rvcov.name") <- rvcov.name
}
object$df <- c(length(b), object$df.residual, length(object$aliased))
class(object) <- c("summary.plm.full", "plm", "panelmodel")
object
}
打印:
print.summary.plm.full <- function (x, digits = max(3, getOption("digits") - 2), width = getOption("width"),
subset = NULL, ...)
{
formula <- formula(x)
has.instruments <- (length(formula)[2] >= 2)
effect <- plm:::describe(x, "effect")
model <- plm:::describe(x, "model")
if (model != "pooling") {
cat(paste(plm:::effect.plm.list[effect], " ", sep = ""))
}
cat(paste(plm:::model.plm.list[model], " Model", sep = ""))
if (model == "random") {
ercomp <- describe(x, "random.method")
cat(paste(" \n (", random.method.list[ercomp], "'s transformation)\n",
sep = ""))
}
else {
cat("\n")
}
if (has.instruments) {
cat("Instrumental variable estimation\n")
if (model != "within") {
ivar <- plm:::describe(x, "inst.method")
cat(paste0(" (", plm:::inst.method.list[ivar], "'s transformation)\n"))
}
}
if (!is.null(x$rvcov)) {
cat("\nNote: Coefficient variance-covariance matrix supplied: ",
attr(x$rvcov, which = "rvcov.name"), "\n", sep = "")
}
cat("\nCall:\n")
print(x$call)
cat("\n")
pdim <- pdim(x)
print(pdim)
if (model %in% c("fd", "between")) {
cat(paste0("Observations used in estimation: ", nobs(x),
"\n"))
}
if (model == "random") {
cat("\nEffects:\n")
print(x$ercomp)
}
cat("\nResiduals:\n")
df <- x$df
rdf <- df[2L]
if (rdf > 5L) {
save.digits <- unlist(options(digits = digits))
on.exit(options(digits = save.digits))
print(plm:::sumres(x))
}
else if (rdf > 0L)
print(residuals(x), digits = digits)
if (rdf == 0L) {
cat("ALL", x$df[1L], "residuals are 0: no residual degrees of freedom!")
cat("\n")
}
if (any(x$aliased, na.rm = TRUE)) {
naliased <- sum(x$aliased, na.rm = TRUE)
cat("\nCoefficients: (", naliased, " dropped because of singularities)\n",
sep = "")
}
else cat("\nCoefficients:\n")
if (is.null(subset))
printCoefmat(coef(x), digits = digits)
else printCoefmat(coef(x)[subset, , drop = FALSE], digits = digits)
cat("\n")
cat(paste("Total Sum of Squares: ", signif(plm:::tss.plm(x), digits),
"\n", sep = ""))
cat(paste("Residual Sum of Squares: ", signif(deviance(x),
digits), "\n", sep = ""))
cat(paste("R-Squared: ", signif(x$r.squared[1], digits),
"\n", sep = ""))
cat(paste("Adj. R-Squared: ", signif(x$r.squared[2], digits),
"\n", sep = ""))
# add the new r squared terms here
cat(paste("Overall R-Squared: ", signif(x$r.squared[3], digits),
"\n", sep = ""))
cat(paste("Between R-Squared: ", signif(x$r.squared[4], digits),
"\n", sep = ""))
fstat <- x$fstatistic
if (names(fstat$statistic) == "F") {
cat(paste("F-statistic: ", signif(fstat$statistic), " on ",
fstat$parameter["df1"], " and ", fstat$parameter["df2"],
" DF, p-value: ", format.pval(fstat$p.value, digits = digits),
"\n", sep = ""))
}
else {
cat(paste("Chisq: ", signif(fstat$statistic), " on ",
fstat$parameter, " DF, p-value: ", format.pval(fstat$p.value,
digits = digits), "\n", sep = ""))
}
invisible(x)
}
现在如果我们使用自定义函数:
library(plm)
# Create some random data
set.seed(1)
x=rnorm(100); fe=rep(rnorm(10),each=10); id=rep(1:10,each=10); ti=rep(1:10,10); e=rnorm(100)
y=x+fe+e
data=data.frame(y,x,id,ti)
# Get plm within R2
reg=plm(y~x,model="within",index=c("id","ti"), effect = "twoways", data=data)
summary.plm.full(reg)
打印:
Twoways effects Within Model
Call:
plm(formula = y ~ x, data = data, effect = "twoways", model = "within",
index = c("id", "ti"))
Balanced Panel: n = 10, T = 10, N = 100
Residuals:
Min. 1st Qu. Median 3rd Qu. Max.
-2.36060 -0.56664 -0.11085 0.56070 2.00869
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
x 1.12765 0.11306 9.9741 1.086e-15 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Total Sum of Squares: 157.21
Residual Sum of Squares: 70.071
R-Squared: 0.55428
Adj. R-Squared: 0.44842
Overall R-Squared: 0.33672
Between R-Squared: 0.17445
F-statistic: 99.4829 on 1 and 80 DF, p-value: 1.0856e-15
'within'估计量相当于最小二乘虚拟变量估计量,可以用OLS估计。这报告了总体 r-squared(...与上面 paqmo 的功能不同 - 也许他们可以澄清一下?)
lsdv<-lm(y~-1+x+as.factor(id)+as.factor(ti),data=data)
summary(lsdv)
请注意,x 的估计系数是相同的。
有没有办法让 plm()
为我计算 R2 和整体 R2 并将它们包含在 summary()
输出中?
要阐明我在 R2 之间、整体和内部的意思,请参阅 StackExchange 上的此答案。
我的理解是plm只在R2内计算。 我是 运行 模型中的双向效应。
随机示例(改编自here):
library(plm)
# Create some random data
set.seed(1)
x=rnorm(100); fe=rep(rnorm(10),each=10); id=rep(1:10,each=10); ti=rep(1:10,10); e=rnorm(100)
y=x+fe+e
data=data.frame(y,x,id,ti)
# Get plm within R2
reg=plm(y~x,model="within",index=c("id","ti"), effect = "twoways", data=data)
summary(reg)$r.squared
我现在也想得到整体和R2之间:
# Pooled Version (overall R2)
reg1=lm(y~x)
summary(reg1)$r.squared
# Between R2
y.means=tapply(y,id,mean)[id]
x.means=tapply(x,id,mean)[id]
reg2=lm(y.means~x.means)
summary(reg3)$r.squared
{plm}
似乎无法报告总体或引用-非引用 R 平方值。您可以通过创建自定义 summary
和 print
方法来破解它:
summary.plm.full <- function (object, vcov = NULL, ...)
{
vcov_arg <- vcov
#add plm::: for plm functions so they are calllex correctly
model <- plm:::describe(object, "model")
effect <- plm:::describe(object, "effect")
random.method <- plm:::describe(object, "random.method")
object$r.squared <- c(rsq = r.squared(object),
adjrsq = r.squared(object, dfcor = TRUE),
# add the two new r squared terms here
rsq_overall = r.squared(object, model = "pooled"),
rsq_btw = r.squared(update(object, effect = "individual", model = "between")))
use.norm.chisq <- FALSE
if (model == "random")
use.norm.chisq <- TRUE
if (length(formula(object))[2] >= 2)
use.norm.chisq <- TRUE
if (model == "ht")
use.norm.chisq <- TRUE
object$fstatistic <- pwaldtest(object, test = ifelse(use.norm.chisq,
"Chisq", "F"), vcov = vcov_arg)
if (!is.null(vcov_arg)) {
if (is.matrix(vcov_arg))
rvcov <- vcov_arg
if (is.function(vcov_arg))
rvcov <- vcov_arg(object)
std.err <- sqrt(diag(rvcov))
}
else {
std.err <- sqrt(diag(stats::vcov(object)))
}
b <- coefficients(object)
z <- b/std.err
p <- if (use.norm.chisq) {
2 * pnorm(abs(z), lower.tail = FALSE)
}
else {
2 * pt(abs(z), df = object$df.residual, lower.tail = FALSE)
}
object$coefficients <- cbind(b, std.err, z, p)
colnames(object$coefficients) <- if (use.norm.chisq) {
c("Estimate", "Std. Error", "z-value", "Pr(>|z|)")
}
else {
c("Estimate", "Std. Error", "t-value", "Pr(>|t|)")
}
if (!is.null(vcov_arg)) {
object$rvcov <- rvcov
rvcov.name <- paste0(deparse(substitute(vcov)))
attr(object$rvcov, which = "rvcov.name") <- rvcov.name
}
object$df <- c(length(b), object$df.residual, length(object$aliased))
class(object) <- c("summary.plm.full", "plm", "panelmodel")
object
}
打印:
print.summary.plm.full <- function (x, digits = max(3, getOption("digits") - 2), width = getOption("width"),
subset = NULL, ...)
{
formula <- formula(x)
has.instruments <- (length(formula)[2] >= 2)
effect <- plm:::describe(x, "effect")
model <- plm:::describe(x, "model")
if (model != "pooling") {
cat(paste(plm:::effect.plm.list[effect], " ", sep = ""))
}
cat(paste(plm:::model.plm.list[model], " Model", sep = ""))
if (model == "random") {
ercomp <- describe(x, "random.method")
cat(paste(" \n (", random.method.list[ercomp], "'s transformation)\n",
sep = ""))
}
else {
cat("\n")
}
if (has.instruments) {
cat("Instrumental variable estimation\n")
if (model != "within") {
ivar <- plm:::describe(x, "inst.method")
cat(paste0(" (", plm:::inst.method.list[ivar], "'s transformation)\n"))
}
}
if (!is.null(x$rvcov)) {
cat("\nNote: Coefficient variance-covariance matrix supplied: ",
attr(x$rvcov, which = "rvcov.name"), "\n", sep = "")
}
cat("\nCall:\n")
print(x$call)
cat("\n")
pdim <- pdim(x)
print(pdim)
if (model %in% c("fd", "between")) {
cat(paste0("Observations used in estimation: ", nobs(x),
"\n"))
}
if (model == "random") {
cat("\nEffects:\n")
print(x$ercomp)
}
cat("\nResiduals:\n")
df <- x$df
rdf <- df[2L]
if (rdf > 5L) {
save.digits <- unlist(options(digits = digits))
on.exit(options(digits = save.digits))
print(plm:::sumres(x))
}
else if (rdf > 0L)
print(residuals(x), digits = digits)
if (rdf == 0L) {
cat("ALL", x$df[1L], "residuals are 0: no residual degrees of freedom!")
cat("\n")
}
if (any(x$aliased, na.rm = TRUE)) {
naliased <- sum(x$aliased, na.rm = TRUE)
cat("\nCoefficients: (", naliased, " dropped because of singularities)\n",
sep = "")
}
else cat("\nCoefficients:\n")
if (is.null(subset))
printCoefmat(coef(x), digits = digits)
else printCoefmat(coef(x)[subset, , drop = FALSE], digits = digits)
cat("\n")
cat(paste("Total Sum of Squares: ", signif(plm:::tss.plm(x), digits),
"\n", sep = ""))
cat(paste("Residual Sum of Squares: ", signif(deviance(x),
digits), "\n", sep = ""))
cat(paste("R-Squared: ", signif(x$r.squared[1], digits),
"\n", sep = ""))
cat(paste("Adj. R-Squared: ", signif(x$r.squared[2], digits),
"\n", sep = ""))
# add the new r squared terms here
cat(paste("Overall R-Squared: ", signif(x$r.squared[3], digits),
"\n", sep = ""))
cat(paste("Between R-Squared: ", signif(x$r.squared[4], digits),
"\n", sep = ""))
fstat <- x$fstatistic
if (names(fstat$statistic) == "F") {
cat(paste("F-statistic: ", signif(fstat$statistic), " on ",
fstat$parameter["df1"], " and ", fstat$parameter["df2"],
" DF, p-value: ", format.pval(fstat$p.value, digits = digits),
"\n", sep = ""))
}
else {
cat(paste("Chisq: ", signif(fstat$statistic), " on ",
fstat$parameter, " DF, p-value: ", format.pval(fstat$p.value,
digits = digits), "\n", sep = ""))
}
invisible(x)
}
现在如果我们使用自定义函数:
library(plm)
# Create some random data
set.seed(1)
x=rnorm(100); fe=rep(rnorm(10),each=10); id=rep(1:10,each=10); ti=rep(1:10,10); e=rnorm(100)
y=x+fe+e
data=data.frame(y,x,id,ti)
# Get plm within R2
reg=plm(y~x,model="within",index=c("id","ti"), effect = "twoways", data=data)
summary.plm.full(reg)
打印:
Twoways effects Within Model
Call:
plm(formula = y ~ x, data = data, effect = "twoways", model = "within",
index = c("id", "ti"))
Balanced Panel: n = 10, T = 10, N = 100
Residuals:
Min. 1st Qu. Median 3rd Qu. Max.
-2.36060 -0.56664 -0.11085 0.56070 2.00869
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
x 1.12765 0.11306 9.9741 1.086e-15 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Total Sum of Squares: 157.21
Residual Sum of Squares: 70.071
R-Squared: 0.55428
Adj. R-Squared: 0.44842
Overall R-Squared: 0.33672
Between R-Squared: 0.17445
F-statistic: 99.4829 on 1 and 80 DF, p-value: 1.0856e-15
'within'估计量相当于最小二乘虚拟变量估计量,可以用OLS估计。这报告了总体 r-squared(...与上面 paqmo 的功能不同 - 也许他们可以澄清一下?)
lsdv<-lm(y~-1+x+as.factor(id)+as.factor(ti),data=data)
summary(lsdv)
请注意,x 的估计系数是相同的。