在 r 中创建时变和时不变的上下文变量
Create a time varying and time invariant contextual variables in r
我正在尝试使用 2002-2018 年的 ESS 调查数据做一个纵向多层次模型。我想看看宏观经济表现对政治信任的影响。为此,我使用 'within-between' 方法同时对横截面和纵向分量进行建模。
现在我有一个数据集,其中添加了每个国家/地区年份的实际 inflation、GDP 增长率和失业率。为了创建一个不随时间变化的组件,我必须为整个时间序列中的每个国家创建一个变量(GDP 增长,inflation,失业率)(例如奥地利 2002-2018 年的平均 GDP 增长).但是,当我 运行 它使用下面的代码时,它会创建一个变量,该变量具有上下文变量的整体数据集的平均值,而不是国家/地区的平均值。
data<-data%>%
group_by(country)%>%
mutate( inflation_mean = mean(inflation, na.rm = TRUE),
inflation_diff = inflation - inflation_mean,
growth_mean = mean(GDPgrowth, na.rm = TRUE),
growth_diff = GDPgrowth - growth_mean,
unemployment_mean = mean(unemployment,na.rm = TRUE),
unemployment_diff = unemployment - unemployment_mean)
这是我的数据框的示例
# country year country_year trust inflation GDPGrowth Unemployment
# 1 Austria 2002 AT2002 4 2.2 4.2 4
# 2 Austria 2002 AT2002 9 2.2 4.2 4
# 55 Belgium 2002 BE2002 7 1.7 2.5 6
# 56 Belgium 2002 BE2002 3 1.7 2.5 6
# 91 Austria 2005 AT2005 2 3.4 2.9 3
# 91 Austria 2005 AT2005 6 3.4 2.9 3
# 141 Belgium 2005 BE2005 5 0.5 1.6 5
# 142 Belgium 2005 BE2005 9 0.5 1.6 5
structure(list(idno = structure(c(1, 2, 3, 4, 6, 7), format.stata = "%12.0g"),
cntry = structure(c("AT", "AT", "AT", "AT", "AT", "AT"), format.stata = "%2s"),
essround = structure(c(1, 1, 1, 1, 1, 1), format.stata = "%12.0g"),
pspwght = structure(c(0.9409328155361, 0.47046640776805,
1.39215496052674, 1.38216297191755, 1.43776626993043, 1.39215496052674
), format.stata = "%12.0g"), agea = structure(c(54, 50, 63,
44, 41, 63), format.stata = "%13.0g"), gndr = structure(c(1,
1, 2, 1, 2, 2), format.stata = "%12.0g"), eduyrs = structure(c(11,
14, 9, 18, 15, 11), format.stata = "%12.0g"), mnactic = structure(c(1,
1, 6, 1, 1, 6), format.stata = "%41.0g"), lrscale = structure(c(6,
6, 5, 5, 5, NA), format.stata = "%12.0g"), rlgdgr = structure(c(8,
5, 7, 7, 10, 3), format.stata = "%20.0g"), dscrgrp = structure(c(2,
1, 2, 2, 2, 1), format.stata = "%12.0g"), dscretn = structure(c(0,
0, 0, 0, 0, 0), format.stata = "%12.0g"), ctzcntr = structure(c(1,
1, 1, 1, 1, 1), format.stata = "%12.0g"), blgetmg = structure(c(2,
2, NA, 2, 2, 2), format.stata = "%12.0g"), hincfel = structure(c(1,
3, 2, 1, 1, 3), format.stata = "%36.0g"), trstprl = structure(c(9,
0, 6, 8, 6, 0), format.stata = "%15.0g"), inwyr = structure(c(2003,
2003, 2003, 2003, 2003, 2003), format.stata = "%13.0g"),
inwyys = structure(c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), format.stata = "%13.0g"), inwyye = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), format.stata = "%13.0g"),
education = structure(c(11, 14, 9, 18, 15, 11), format.stata = "%12.0g"),
female = structure(c(0, 0, 1, 0, 1, 1), format.stata = "%12.0g"),
employement = structure(c(1, 1, 6, 1, 1, 6), format.stata = "%41.0g"),
age = structure(c(54, 50, 63, 44, 41, 63), format.stata = "%13.0g"),
year = c(2003, 2003, 2003, 2003, 2003, 2003), year1 = c(2010,
2010, 2010, 2010, 2010, 2010), year0 = c(1, 1, 1, 1, 1, 1
), cntry_year = c("AT 2003", "AT 2003", "AT 2003", "AT 2003",
"AT 2003", "AT 2003"), n = c(2257L, 2257L, 2257L, 2257L,
2257L, 2257L), year_lag = c(2002, 2002, 2002, 2002, 2002,
2002), gini_disp = c(26.7, 26.7, 26.7, 26.7, 26.7, 26.7),
unemployment = c("4.849999905", "4.849999905", "4.849999905",
"4.849999905", "4.849999905", "4.849999905"), corruption = c(1.966434,
1.966434, 1.966434, 1.966434, 1.966434, 1.966434), wb_growth = structure(c(1.65155392186669,
1.65155392186669, 1.65155392186669, 1.65155392186669, 1.65155392186669,
1.65155392186669), label = "GDP growth (annual %)"), wb_inflation = structure(c(1.81035787764132,
1.81035787764132, 1.81035787764132, 1.81035787764132, 1.81035787764132,
1.81035787764132), label = "Inflation, consumer prices (annual %)"),
old_demo = c(1, 1, 1, 1, 1, 1), gini_mean = c(28.9443587367257,
28.9443587367257, 28.9443587367257, 28.9443587367257, 28.9443587367257,
28.9443587367257), gini_diff = c(-2.24435873672569, -2.24435873672569,
-2.24435873672569, -2.24435873672569, -2.24435873672569,
-2.24435873672569), inflation_mean = c(2.36193292302435,
2.36193292302435, 2.36193292302435, 2.36193292302435, 2.36193292302435, 2.36193292302435), inflation_diff = structure(c(-0.551575045383031,
-0.551575045383031, -0.551575045383031, -0.551575045383031,
-0.551575045383031, -0.551575045383031), label = "Inflation, consumer prices (annual %)"),
growth_mean = c(2.11454728111128, 2.11454728111128, 2.11454728111128,
2.11454728111128, 2.11454728111128, 2.11454728111128), growth_diff = structure(c(-0.462993359244594,
-0.462993359244594, -0.462993359244594, -0.462993359244594,
-0.462993359244594, -0.462993359244594), label = "GDP growth (annual %)"),
corruption_mean = c(1.26648259354364, 1.26648259354364, 1.26648259354364,
1.26648259354364, 1.26648259354364, 1.26648259354364), corruption_diff = c(0.699951406456357,
0.699951406456357, 0.699951406456357, 0.699951406456357,
0.699951406456357, 0.699951406456357), `fdata1$cntry` = structure(c("AT",
"AT", "AT", "AT", "AT", "AT"), format.stata = "%2s"), country = structure(c("AT",
"AT", "AT", "AT", "AT", "AT"), format.stata = "%2s")), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), groups = structure(list(
`fdata1$cntry` = structure("AT", format.stata = "%2s"), .rows = structure(list(
1:6), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))
require(tidyverse)
data <- mydatacomp %>%
merge(., countrycode::codelist %>%
janitor::clean_names() %>%
select(
country_name = country_name_en,
appr = iso2c,
cont = continent
),
by.x = "cntry",
by.y = "appr") %>%
tibble() %>%
mutate(cntry = country_name)
data %>%
mutate(unemployment = unemployment %>% as.numeric()) %>%
select(cntry, year, wb_inflation, wb_growth, unemployment) %>%
group_by(cntry) %>%
mutate(inflation_mean = mean(wb_inflation, na.rm = TRUE),
inflation_diff = wb_inflation - inflation_mean,
growth_mean = mean(wb_growth, na.rm = TRUE),
growth_diff = wb_growth - growth_mean,
unemploy_mean = mean(unemployment, na.rm = TRUE),
unemploy_diff = unemployment - unemploy_mean) %>%
distinct_all() %>%
filter(!is.na(year)) %>%
arrange(cntry, -year)
我正在尝试使用 2002-2018 年的 ESS 调查数据做一个纵向多层次模型。我想看看宏观经济表现对政治信任的影响。为此,我使用 'within-between' 方法同时对横截面和纵向分量进行建模。
现在我有一个数据集,其中添加了每个国家/地区年份的实际 inflation、GDP 增长率和失业率。为了创建一个不随时间变化的组件,我必须为整个时间序列中的每个国家创建一个变量(GDP 增长,inflation,失业率)(例如奥地利 2002-2018 年的平均 GDP 增长).但是,当我 运行 它使用下面的代码时,它会创建一个变量,该变量具有上下文变量的整体数据集的平均值,而不是国家/地区的平均值。
data<-data%>%
group_by(country)%>%
mutate( inflation_mean = mean(inflation, na.rm = TRUE),
inflation_diff = inflation - inflation_mean,
growth_mean = mean(GDPgrowth, na.rm = TRUE),
growth_diff = GDPgrowth - growth_mean,
unemployment_mean = mean(unemployment,na.rm = TRUE),
unemployment_diff = unemployment - unemployment_mean)
这是我的数据框的示例
# country year country_year trust inflation GDPGrowth Unemployment
# 1 Austria 2002 AT2002 4 2.2 4.2 4
# 2 Austria 2002 AT2002 9 2.2 4.2 4
# 55 Belgium 2002 BE2002 7 1.7 2.5 6
# 56 Belgium 2002 BE2002 3 1.7 2.5 6
# 91 Austria 2005 AT2005 2 3.4 2.9 3
# 91 Austria 2005 AT2005 6 3.4 2.9 3
# 141 Belgium 2005 BE2005 5 0.5 1.6 5
# 142 Belgium 2005 BE2005 9 0.5 1.6 5
structure(list(idno = structure(c(1, 2, 3, 4, 6, 7), format.stata = "%12.0g"),
cntry = structure(c("AT", "AT", "AT", "AT", "AT", "AT"), format.stata = "%2s"),
essround = structure(c(1, 1, 1, 1, 1, 1), format.stata = "%12.0g"),
pspwght = structure(c(0.9409328155361, 0.47046640776805,
1.39215496052674, 1.38216297191755, 1.43776626993043, 1.39215496052674
), format.stata = "%12.0g"), agea = structure(c(54, 50, 63,
44, 41, 63), format.stata = "%13.0g"), gndr = structure(c(1,
1, 2, 1, 2, 2), format.stata = "%12.0g"), eduyrs = structure(c(11,
14, 9, 18, 15, 11), format.stata = "%12.0g"), mnactic = structure(c(1,
1, 6, 1, 1, 6), format.stata = "%41.0g"), lrscale = structure(c(6,
6, 5, 5, 5, NA), format.stata = "%12.0g"), rlgdgr = structure(c(8,
5, 7, 7, 10, 3), format.stata = "%20.0g"), dscrgrp = structure(c(2,
1, 2, 2, 2, 1), format.stata = "%12.0g"), dscretn = structure(c(0,
0, 0, 0, 0, 0), format.stata = "%12.0g"), ctzcntr = structure(c(1,
1, 1, 1, 1, 1), format.stata = "%12.0g"), blgetmg = structure(c(2,
2, NA, 2, 2, 2), format.stata = "%12.0g"), hincfel = structure(c(1,
3, 2, 1, 1, 3), format.stata = "%36.0g"), trstprl = structure(c(9,
0, 6, 8, 6, 0), format.stata = "%15.0g"), inwyr = structure(c(2003,
2003, 2003, 2003, 2003, 2003), format.stata = "%13.0g"),
inwyys = structure(c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), format.stata = "%13.0g"), inwyye = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), format.stata = "%13.0g"),
education = structure(c(11, 14, 9, 18, 15, 11), format.stata = "%12.0g"),
female = structure(c(0, 0, 1, 0, 1, 1), format.stata = "%12.0g"),
employement = structure(c(1, 1, 6, 1, 1, 6), format.stata = "%41.0g"),
age = structure(c(54, 50, 63, 44, 41, 63), format.stata = "%13.0g"),
year = c(2003, 2003, 2003, 2003, 2003, 2003), year1 = c(2010,
2010, 2010, 2010, 2010, 2010), year0 = c(1, 1, 1, 1, 1, 1
), cntry_year = c("AT 2003", "AT 2003", "AT 2003", "AT 2003",
"AT 2003", "AT 2003"), n = c(2257L, 2257L, 2257L, 2257L,
2257L, 2257L), year_lag = c(2002, 2002, 2002, 2002, 2002,
2002), gini_disp = c(26.7, 26.7, 26.7, 26.7, 26.7, 26.7),
unemployment = c("4.849999905", "4.849999905", "4.849999905",
"4.849999905", "4.849999905", "4.849999905"), corruption = c(1.966434,
1.966434, 1.966434, 1.966434, 1.966434, 1.966434), wb_growth = structure(c(1.65155392186669,
1.65155392186669, 1.65155392186669, 1.65155392186669, 1.65155392186669,
1.65155392186669), label = "GDP growth (annual %)"), wb_inflation = structure(c(1.81035787764132,
1.81035787764132, 1.81035787764132, 1.81035787764132, 1.81035787764132,
1.81035787764132), label = "Inflation, consumer prices (annual %)"),
old_demo = c(1, 1, 1, 1, 1, 1), gini_mean = c(28.9443587367257,
28.9443587367257, 28.9443587367257, 28.9443587367257, 28.9443587367257,
28.9443587367257), gini_diff = c(-2.24435873672569, -2.24435873672569,
-2.24435873672569, -2.24435873672569, -2.24435873672569,
-2.24435873672569), inflation_mean = c(2.36193292302435,
2.36193292302435, 2.36193292302435, 2.36193292302435, 2.36193292302435, 2.36193292302435), inflation_diff = structure(c(-0.551575045383031,
-0.551575045383031, -0.551575045383031, -0.551575045383031,
-0.551575045383031, -0.551575045383031), label = "Inflation, consumer prices (annual %)"),
growth_mean = c(2.11454728111128, 2.11454728111128, 2.11454728111128,
2.11454728111128, 2.11454728111128, 2.11454728111128), growth_diff = structure(c(-0.462993359244594,
-0.462993359244594, -0.462993359244594, -0.462993359244594,
-0.462993359244594, -0.462993359244594), label = "GDP growth (annual %)"),
corruption_mean = c(1.26648259354364, 1.26648259354364, 1.26648259354364,
1.26648259354364, 1.26648259354364, 1.26648259354364), corruption_diff = c(0.699951406456357,
0.699951406456357, 0.699951406456357, 0.699951406456357,
0.699951406456357, 0.699951406456357), `fdata1$cntry` = structure(c("AT",
"AT", "AT", "AT", "AT", "AT"), format.stata = "%2s"), country = structure(c("AT",
"AT", "AT", "AT", "AT", "AT"), format.stata = "%2s")), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), groups = structure(list(
`fdata1$cntry` = structure("AT", format.stata = "%2s"), .rows = structure(list(
1:6), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))
require(tidyverse)
data <- mydatacomp %>%
merge(., countrycode::codelist %>%
janitor::clean_names() %>%
select(
country_name = country_name_en,
appr = iso2c,
cont = continent
),
by.x = "cntry",
by.y = "appr") %>%
tibble() %>%
mutate(cntry = country_name)
data %>%
mutate(unemployment = unemployment %>% as.numeric()) %>%
select(cntry, year, wb_inflation, wb_growth, unemployment) %>%
group_by(cntry) %>%
mutate(inflation_mean = mean(wb_inflation, na.rm = TRUE),
inflation_diff = wb_inflation - inflation_mean,
growth_mean = mean(wb_growth, na.rm = TRUE),
growth_diff = wb_growth - growth_mean,
unemploy_mean = mean(unemployment, na.rm = TRUE),
unemploy_diff = unemployment - unemploy_mean) %>%
distinct_all() %>%
filter(!is.na(year)) %>%
arrange(cntry, -year)