可视化面板数据中两个变量之间的关系
Visualise the relation between two variables in panel data
我对 R
很熟悉,但对绘图不是很熟悉。我的面板数据如下:
library(plm)
library(dplyr)
data("EmplUK", package="plm")
EmplUK <- EmplUK %>%
group_by(firm, year) %>%
mutate(Vote = sample(c(0,1),1) ,
Vote_won = ifelse(Vote==1, sample(c(0,1),1),0))
# EDIT:
EmplUK <- pdata.frame(EmplUK , index=c("firm", "year"), drop.index = FALSE)
# A tibble: 1,031 x 9
# Groups: firm, year [1,031]
firm year sector emp wage capital output Vote Vote_won
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1977 7 5.04 13.2 0.589 95.7 1 0
2 1 1978 7 5.60 12.3 0.632 97.4 0 0
3 1 1979 7 5.01 12.8 0.677 99.6 1 1
4 1 1980 7 4.72 13.8 0.617 101. 1 1
5 1 1981 7 4.09 14.3 0.508 99.6 0 0
6 1 1982 7 3.17 14.9 0.423 98.6 0 0
7 1 1983 7 2.94 13.8 0.392 100. 0 0
8 2 1977 7 71.3 14.8 16.9 95.7 1 0
9 2 1978 7 70.6 14.1 17.2 97.4 1 1
10 2 1979 7 70.9 15.0 17.5 99.6 1 1
toplot <- plm(output ~ wage, data=EmplUK, model="within")
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
wage -0.707 0.143 -4.94 0.00000095 ***
我想通过可视化产出和工资之间的关系(以及可能拟合此类线性、二次、多项式)来评估面板数据中两个变量之间的最佳关系(线性、二次、多项式)。但是我对绘图超级陌生。
我正在寻找这样的东西 (source)(我从中得到拟合线的公式):
我试过如下开始:
plot(EmplUK$output,EmplUK$wage,type='l',col='red',main='Linear relationship')
但这给了我这个:
老实说,我不知道我在这里做什么。有没有人可以让我朝着正确的方向前进?
也许像这样使用 ggplot2
:
library(ggplot2)
ggplot(EmplUK, aes(output, wage)) +
geom_line(color = 'red') +
geom_smooth(size = 2) +
ggtitle('Linear relationship') +
theme_bw()
plm
有一个内置的 plot
方法 plm:::plot.plm
也显示固定效果。对于多项式分析,您可以使用 loess
模型的 yhat
和按公司 col
orize。所以两个图一起可以让你了解数据情况。
EmplUK <- transform(EmplUK, yhat=predict(loess(output ~ wage)))
op <- par(mfrow=c(1, 2), mar=c(4.5, 4, 3, 1))
plot(toplot) ## from `plm:::plot.plm`
plot(output ~ wage, EmplUK, type="p", pch=20, cex=.5, col=firm, ylim=range(EmplUK$yhat))
invisible(sapply(unique(EmplUK$firm), function(x)
lines(yhat ~ wage, EmplUK[EmplUK$firm == x, ], col=x, lwd=1)))
par(op)
当然loess
不能用因子变量;在 Cross Validated 上,他们建议 Semiparametric Nonlinear Mixed Effects model using the nlme
package 在混合模型上应用 LOESS。
我可能会用去中心化的数据来做。
demeaned_data <- EmplUK %>%
group_by(firm) %>%
mutate(across(c(output, wage), function(x)x-mean(x)))
ggplot(demeaned_data, aes(x=wage, y=output)) +
geom_point() +
geom_smooth(aes(colour="linear", fill="linear"),
method="lm",
formula=y ~ x, ) +
geom_smooth(aes(colour="quadratic", fill="quadratic"),
method="lm",
formula=y ~ x + I(x^2)) +
geom_smooth(aes(colour="cubic", fill="cubic"),
method="lm",
formula=y ~ x + I(x^2) + I(x^3)) +
scale_fill_brewer(palette="Set1") +
scale_colour_brewer(palette="Set1") +
theme_classic() +
labs(colour="Functional Form", fill="Functional Form")
另一种方法是使用 OLS 和公司虚拟变量来估计模型,然后您可以获得每个公司的预测并分别绘制它们。
library(ggeffects)
data("EmplUK", package="plm")
EmplUK <- EmplUK %>% mutate(firm = as.factor(firm))
m1 <- lm(output ~ wage + firm, data=EmplUK )
m2 <- lm(output ~ wage + I(wage^2) + firm, data=EmplUK )
m3 <- lm(output ~ wage + I(wage^2) + I(wage^3) + firm, data=EmplUK )
p1 <- ggpredict(m1, terms=c("wage", "firm")) %>%
mutate(form="linear") %>%
rename("wage" = "x",
"firm" = "group",
"output" = "predicted")
p2 <- ggpredict(m2, terms=c("wage", "firm")) %>%
mutate(form="quadratic") %>%
rename("wage" = "x",
"firm" = "group",
"output" = "predicted")
p3 <- ggpredict(m3, terms=c("wage", "firm")) %>%
mutate(form="cubic") %>%
rename("wage" = "x",
"firm" = "group",
"output" = "predicted")
ggplot() +
geom_line(data=p1, aes(x=wage, y=output, colour="linear")) +
geom_line(data=p2, aes(x=wage, y=output, colour="quadratic")) +
geom_line(data=p3, aes(x=wage, y=output, colour="cubic")) +
geom_point(data=EmplUK, aes(x=wage, y=output)) +
facet_wrap(~firm) +
theme_bw() +
labs(colour="Functional\nForm")
我对 R
很熟悉,但对绘图不是很熟悉。我的面板数据如下:
library(plm)
library(dplyr)
data("EmplUK", package="plm")
EmplUK <- EmplUK %>%
group_by(firm, year) %>%
mutate(Vote = sample(c(0,1),1) ,
Vote_won = ifelse(Vote==1, sample(c(0,1),1),0))
# EDIT:
EmplUK <- pdata.frame(EmplUK , index=c("firm", "year"), drop.index = FALSE)
# A tibble: 1,031 x 9
# Groups: firm, year [1,031]
firm year sector emp wage capital output Vote Vote_won
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1977 7 5.04 13.2 0.589 95.7 1 0
2 1 1978 7 5.60 12.3 0.632 97.4 0 0
3 1 1979 7 5.01 12.8 0.677 99.6 1 1
4 1 1980 7 4.72 13.8 0.617 101. 1 1
5 1 1981 7 4.09 14.3 0.508 99.6 0 0
6 1 1982 7 3.17 14.9 0.423 98.6 0 0
7 1 1983 7 2.94 13.8 0.392 100. 0 0
8 2 1977 7 71.3 14.8 16.9 95.7 1 0
9 2 1978 7 70.6 14.1 17.2 97.4 1 1
10 2 1979 7 70.9 15.0 17.5 99.6 1 1
toplot <- plm(output ~ wage, data=EmplUK, model="within")
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
wage -0.707 0.143 -4.94 0.00000095 ***
我想通过可视化产出和工资之间的关系(以及可能拟合此类线性、二次、多项式)来评估面板数据中两个变量之间的最佳关系(线性、二次、多项式)。但是我对绘图超级陌生。
我正在寻找这样的东西 (source)(我从中得到拟合线的公式):
我试过如下开始:
plot(EmplUK$output,EmplUK$wage,type='l',col='red',main='Linear relationship')
但这给了我这个:
老实说,我不知道我在这里做什么。有没有人可以让我朝着正确的方向前进?
也许像这样使用 ggplot2
:
library(ggplot2)
ggplot(EmplUK, aes(output, wage)) +
geom_line(color = 'red') +
geom_smooth(size = 2) +
ggtitle('Linear relationship') +
theme_bw()
plm
有一个内置的 plot
方法 plm:::plot.plm
也显示固定效果。对于多项式分析,您可以使用 loess
模型的 yhat
和按公司 col
orize。所以两个图一起可以让你了解数据情况。
EmplUK <- transform(EmplUK, yhat=predict(loess(output ~ wage)))
op <- par(mfrow=c(1, 2), mar=c(4.5, 4, 3, 1))
plot(toplot) ## from `plm:::plot.plm`
plot(output ~ wage, EmplUK, type="p", pch=20, cex=.5, col=firm, ylim=range(EmplUK$yhat))
invisible(sapply(unique(EmplUK$firm), function(x)
lines(yhat ~ wage, EmplUK[EmplUK$firm == x, ], col=x, lwd=1)))
par(op)
当然loess
不能用因子变量;在 Cross Validated 上,他们建议 Semiparametric Nonlinear Mixed Effects model using the nlme
package 在混合模型上应用 LOESS。
我可能会用去中心化的数据来做。
demeaned_data <- EmplUK %>%
group_by(firm) %>%
mutate(across(c(output, wage), function(x)x-mean(x)))
ggplot(demeaned_data, aes(x=wage, y=output)) +
geom_point() +
geom_smooth(aes(colour="linear", fill="linear"),
method="lm",
formula=y ~ x, ) +
geom_smooth(aes(colour="quadratic", fill="quadratic"),
method="lm",
formula=y ~ x + I(x^2)) +
geom_smooth(aes(colour="cubic", fill="cubic"),
method="lm",
formula=y ~ x + I(x^2) + I(x^3)) +
scale_fill_brewer(palette="Set1") +
scale_colour_brewer(palette="Set1") +
theme_classic() +
labs(colour="Functional Form", fill="Functional Form")
另一种方法是使用 OLS 和公司虚拟变量来估计模型,然后您可以获得每个公司的预测并分别绘制它们。
library(ggeffects)
data("EmplUK", package="plm")
EmplUK <- EmplUK %>% mutate(firm = as.factor(firm))
m1 <- lm(output ~ wage + firm, data=EmplUK )
m2 <- lm(output ~ wage + I(wage^2) + firm, data=EmplUK )
m3 <- lm(output ~ wage + I(wage^2) + I(wage^3) + firm, data=EmplUK )
p1 <- ggpredict(m1, terms=c("wage", "firm")) %>%
mutate(form="linear") %>%
rename("wage" = "x",
"firm" = "group",
"output" = "predicted")
p2 <- ggpredict(m2, terms=c("wage", "firm")) %>%
mutate(form="quadratic") %>%
rename("wage" = "x",
"firm" = "group",
"output" = "predicted")
p3 <- ggpredict(m3, terms=c("wage", "firm")) %>%
mutate(form="cubic") %>%
rename("wage" = "x",
"firm" = "group",
"output" = "predicted")
ggplot() +
geom_line(data=p1, aes(x=wage, y=output, colour="linear")) +
geom_line(data=p2, aes(x=wage, y=output, colour="quadratic")) +
geom_line(data=p3, aes(x=wage, y=output, colour="cubic")) +
geom_point(data=EmplUK, aes(x=wage, y=output)) +
facet_wrap(~firm) +
theme_bw() +
labs(colour="Functional\nForm")