Bayesian Ordered Logit - 尝试根据模态个体绘制随时间变化的预测 y
Bayesian Ordered Logit - Trying to plot predicted y over time based on a modal individual
我有一个数据集,其中结合了多年来对不同国家的多项调查。我的因变量 (lrparty) 是根据调查受访者的政党的意识形态立场(范围从 0 到 10)。我有几个自变量,例如受访者的年龄、性别、教育程度、党派偏见和收入。
然后,对于每一方和每项调查,我想根据模态个体(例如,年龄 = 31,女性 = 1,教育 = 2,收入 = 2,和党派 = 1) 随着时间的推移。因此,图表看起来像:x 轴 = 年; y 轴 = 根据模态个体对 lrparty 的预测值。
总而言之,这些是我正在尝试做的阶段:
1. 估计模型:
政党 (lrparty) 位置的有序对数,对受访者的性别、年龄、教育、收入和党派关系进行回归。
进行后抽。
预测模态受访者的派对安排(例如,500 抽奖)
然后,我希望得到一个如下所示的数据集:
年份、调查、国家/地区、党(cmp 代码)、%missing placements、x1:x500(来自抽奖)
我将从该数据集中生成我的图表。例如,对于英国,根据 CSES 调查。
为了弄清楚代码,我开始只使用一项调查 (cses)、一个国家 (uk) 和一个政党 (conservatives),如下面的代码所示。但是我不知道如何从我在代码中的位置到达我想要的情节(如上所述)。
library(rstan)
library(tidyverse)
library(brms)
library(ggplot2)
library(ggthemes)
library(ggmcmc)
## Data:
load("pbrands.RData")
## Keeping only country = uk; survey = cses; party = conservatives
uk_cses_con = pbrands %>%
select(lrparty, female, age, education, income, partisan, year, survey,
country, cmp, party_name_short, party_name_english, lrs) %>%
filter(survey == "cses") %>%
filter(country == "uk") %>%
filter(cmp == 51620)
## Conducting a Bayesian ordered logit model
fit <- brm(lrparty ~ age + income + education + female + partisan,
data = uk_cses_con, family = "cumulative", chains = 4, iter = 1000)
## Trace and Density Plots for MCMC Samples
plot(fit)
## Posterior Predictive Checks
pp_check(fit)
## Getting variables' modes:
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
getmode(uk_cses_con$age)
getmode(uk_cses_con$female)
getmode(uk_cses_con$education)
getmode(uk_cses_con$income)
getmode(uk_cses_con$partisan)
## Creating the data frame for the modal individual
newavg <- data.frame(age = 31, female = 1, education = 2, income = 2,
partisan = 0, years = uk_cses_con$year)
## predict response for new data
pred <- predict(fit, newdata = newavg)
# extract posterior samples of population-level effects
samples1 <- posterior_samples(fit)
## Display marginal effects of predictors
marginal <- marginal_effects(fit)
## Plot predicted lrparty (my dependent variable) over time (with error:
confidence interval) based on the modal respondent (age = 31, female = 1,
education = 0, income = 0, partisan = 0)
##?
提前致谢!
好的。经过几次尝试和错误尝试,我想出了代码。由于其他人可能会感兴趣,因此我在下面发布代码。
## Packages
install.packages(c("bmrs", "coda", "mvtnorm", "devtools"))
library(devtools)
library(tidyverse)
library(brms)
## Loading the data
load('~/Data/mydata.RData')
## Keeping the variables of our interest
mydata = mydata %>%
select(lrparty, female, age, education, income, partisan, year, survey,
country, cmp, party_name_short, party_name_english, lrs)
## Function for calculating modes
getmode <- function(v) {
uniqv = unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
## Finding Modal respondents by country, survey, and party:
## Modes by country
mode_by_country = mydata %>%
group_by(country) %>%
mutate(modal_age = getmode(na.omit(age))) %>%
mutate(modal_inc = getmode(na.omit(income))) %>%
mutate(modal_female = getmode(na.omit(female))) %>%
mutate(modal_edu = getmode(na.omit(education))) %>%
mutate(modal_partisan = getmode(na.omit(partisan))) %>%
filter(!duplicated(country))
mode_by_country = mode_by_country[ , c(9, 14:18)]
mode_by_country = mode_by_country[-40, ]
## Build object to receive the information we want to store
runner <- c()
pred = matrix(NA, 2000, 11)
yhat = matrix(NA, 2000, 1)
###### Conducting the model for UK with two parties #########
uk = mydata %>%
select(lrparty, female, age, education, income, partisan, year, survey,
country, cmp, party_name_short, party_name_english, lrs) %>%
filter(survey == "cses") %>%
filter(country == "uk") %>%
filter(cmp == 51320 | cmp == 51620)
## Finding how many regressions will be conducted
reglength <- length(unique(uk$survey)) * length(unique(uk$year)) * length(unique(uk$cmp))
## Creating our modal British individual based on mode_by_country
mode_by_country[mode_by_country$country == "uk", c(2:6)]
newavg <- data.frame(age = 35, income = 2, female = 1, education = 2, partisan = 0)
## Loop to conduct the ordered logit in rstan, using iter=1000, and chains=4
for(p in na.omit(unique(uk$cmp))){
hold <- uk[uk$cmp == p, ]
country <- hold$country[1]
for(s in na.omit(unique(hold$survey))){
hold1 <- hold[hold$survey == s, ]
for(y in na.omit(unique(hold1$year))){
mod <- brm(lrparty ~ age + female + education + income + partisan, data = hold1[hold1$year == y, ], family = "cumulative", chains = 4, iter = 1000)
for(i in 1:2000) {
pred[i,] <- predict(mod, newdata = newavg, probs = c(0.025, 0.975), summary=TRUE)
yhat[i] <- sum(pred[i, ] * c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11))
}
newData <- data.frame(country, p, s, y, pred, yhat)
newData$m <- mean(newData$yhat)
newData$sd <- sd(newData$yhat)
newData$lower <- newData$m - 1.96*newData$sd
newData$upper <- newData$m + 1.96*newData$sd
runner <- rbind(runner, newData)
}
}
}
## Keeping unique values within dataset
uniqdata = runner %>%
filter(!duplicated(m))
## Creating the Figure
uniqdata2 <- uniqdata[, c(1:4, 17:20)]
uniqdata3 <- uniqdata2 %>%
gather(variable, value, -(y:p)) %>%
unite(temp, p, variable) %>%
spread(temp, value)
uniqdata3 = uniqdata3[ , -c(3,6,8,11)]
names(uniqdata3)[3:8] = c("lower_lab", "m_lab", "upper_lab", "lower_con", "m_con", "upper_con")
uniqdata3[3:8] = as.numeric(unlist(uniqdata3[3:8]))
## Plot: Predicted Party Ideological Placement for Modal British Respondent
ggplot(uniqdata3, aes(x = (y))) + geom_line(aes(y = m_lab, colour = "Labor")) + geom_ribbon(aes(ymin = lower_lab,ymax = upper_lab,
linetype=NA), alpha = .25) +
geom_line(aes(y = m_con, color = "Conservatives")) +
geom_ribbon(aes(ymin = lower_con,
ymax = upper_con,
linetype=NA), alpha = .25) +
theme_bw() +
theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5)) + labs(title = "Predicted Party Ideological Placement for Modal British Respondent \n Survey = CSES") + theme(plot.title = element_text(color="black", size=20, face="bold.italic"), axis.title.x = element_text(color="black", size=15, face="italic"), axis.title.y = element_text(color="black", size=15, face="italic")) +
theme(legend.title = element_blank()) +
theme(axis.text.x = element_text(color="black", size= 12.5), axis.text.y = element_text(color="black", size=12.5)) + theme(legend.text = element_text(size=15)) + scale_x_continuous(name="Year", breaks=seq(1997, 2005, 2)) + scale_y_continuous(name="Left-Right Party Position", limits=c(0, 10))
我有一个数据集,其中结合了多年来对不同国家的多项调查。我的因变量 (lrparty) 是根据调查受访者的政党的意识形态立场(范围从 0 到 10)。我有几个自变量,例如受访者的年龄、性别、教育程度、党派偏见和收入。
然后,对于每一方和每项调查,我想根据模态个体(例如,年龄 = 31,女性 = 1,教育 = 2,收入 = 2,和党派 = 1) 随着时间的推移。因此,图表看起来像:x 轴 = 年; y 轴 = 根据模态个体对 lrparty 的预测值。
总而言之,这些是我正在尝试做的阶段: 1. 估计模型: 政党 (lrparty) 位置的有序对数,对受访者的性别、年龄、教育、收入和党派关系进行回归。
进行后抽。
预测模态受访者的派对安排(例如,500 抽奖)
然后,我希望得到一个如下所示的数据集: 年份、调查、国家/地区、党(cmp 代码)、%missing placements、x1:x500(来自抽奖)
我将从该数据集中生成我的图表。例如,对于英国,根据 CSES 调查。
为了弄清楚代码,我开始只使用一项调查 (cses)、一个国家 (uk) 和一个政党 (conservatives),如下面的代码所示。但是我不知道如何从我在代码中的位置到达我想要的情节(如上所述)。
library(rstan)
library(tidyverse)
library(brms)
library(ggplot2)
library(ggthemes)
library(ggmcmc)
## Data:
load("pbrands.RData")
## Keeping only country = uk; survey = cses; party = conservatives
uk_cses_con = pbrands %>%
select(lrparty, female, age, education, income, partisan, year, survey,
country, cmp, party_name_short, party_name_english, lrs) %>%
filter(survey == "cses") %>%
filter(country == "uk") %>%
filter(cmp == 51620)
## Conducting a Bayesian ordered logit model
fit <- brm(lrparty ~ age + income + education + female + partisan,
data = uk_cses_con, family = "cumulative", chains = 4, iter = 1000)
## Trace and Density Plots for MCMC Samples
plot(fit)
## Posterior Predictive Checks
pp_check(fit)
## Getting variables' modes:
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
getmode(uk_cses_con$age)
getmode(uk_cses_con$female)
getmode(uk_cses_con$education)
getmode(uk_cses_con$income)
getmode(uk_cses_con$partisan)
## Creating the data frame for the modal individual
newavg <- data.frame(age = 31, female = 1, education = 2, income = 2,
partisan = 0, years = uk_cses_con$year)
## predict response for new data
pred <- predict(fit, newdata = newavg)
# extract posterior samples of population-level effects
samples1 <- posterior_samples(fit)
## Display marginal effects of predictors
marginal <- marginal_effects(fit)
## Plot predicted lrparty (my dependent variable) over time (with error:
confidence interval) based on the modal respondent (age = 31, female = 1,
education = 0, income = 0, partisan = 0)
##?
提前致谢!
好的。经过几次尝试和错误尝试,我想出了代码。由于其他人可能会感兴趣,因此我在下面发布代码。
## Packages
install.packages(c("bmrs", "coda", "mvtnorm", "devtools"))
library(devtools)
library(tidyverse)
library(brms)
## Loading the data
load('~/Data/mydata.RData')
## Keeping the variables of our interest
mydata = mydata %>%
select(lrparty, female, age, education, income, partisan, year, survey,
country, cmp, party_name_short, party_name_english, lrs)
## Function for calculating modes
getmode <- function(v) {
uniqv = unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
## Finding Modal respondents by country, survey, and party:
## Modes by country
mode_by_country = mydata %>%
group_by(country) %>%
mutate(modal_age = getmode(na.omit(age))) %>%
mutate(modal_inc = getmode(na.omit(income))) %>%
mutate(modal_female = getmode(na.omit(female))) %>%
mutate(modal_edu = getmode(na.omit(education))) %>%
mutate(modal_partisan = getmode(na.omit(partisan))) %>%
filter(!duplicated(country))
mode_by_country = mode_by_country[ , c(9, 14:18)]
mode_by_country = mode_by_country[-40, ]
## Build object to receive the information we want to store
runner <- c()
pred = matrix(NA, 2000, 11)
yhat = matrix(NA, 2000, 1)
###### Conducting the model for UK with two parties #########
uk = mydata %>%
select(lrparty, female, age, education, income, partisan, year, survey,
country, cmp, party_name_short, party_name_english, lrs) %>%
filter(survey == "cses") %>%
filter(country == "uk") %>%
filter(cmp == 51320 | cmp == 51620)
## Finding how many regressions will be conducted
reglength <- length(unique(uk$survey)) * length(unique(uk$year)) * length(unique(uk$cmp))
## Creating our modal British individual based on mode_by_country
mode_by_country[mode_by_country$country == "uk", c(2:6)]
newavg <- data.frame(age = 35, income = 2, female = 1, education = 2, partisan = 0)
## Loop to conduct the ordered logit in rstan, using iter=1000, and chains=4
for(p in na.omit(unique(uk$cmp))){
hold <- uk[uk$cmp == p, ]
country <- hold$country[1]
for(s in na.omit(unique(hold$survey))){
hold1 <- hold[hold$survey == s, ]
for(y in na.omit(unique(hold1$year))){
mod <- brm(lrparty ~ age + female + education + income + partisan, data = hold1[hold1$year == y, ], family = "cumulative", chains = 4, iter = 1000)
for(i in 1:2000) {
pred[i,] <- predict(mod, newdata = newavg, probs = c(0.025, 0.975), summary=TRUE)
yhat[i] <- sum(pred[i, ] * c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11))
}
newData <- data.frame(country, p, s, y, pred, yhat)
newData$m <- mean(newData$yhat)
newData$sd <- sd(newData$yhat)
newData$lower <- newData$m - 1.96*newData$sd
newData$upper <- newData$m + 1.96*newData$sd
runner <- rbind(runner, newData)
}
}
}
## Keeping unique values within dataset
uniqdata = runner %>%
filter(!duplicated(m))
## Creating the Figure
uniqdata2 <- uniqdata[, c(1:4, 17:20)]
uniqdata3 <- uniqdata2 %>%
gather(variable, value, -(y:p)) %>%
unite(temp, p, variable) %>%
spread(temp, value)
uniqdata3 = uniqdata3[ , -c(3,6,8,11)]
names(uniqdata3)[3:8] = c("lower_lab", "m_lab", "upper_lab", "lower_con", "m_con", "upper_con")
uniqdata3[3:8] = as.numeric(unlist(uniqdata3[3:8]))
## Plot: Predicted Party Ideological Placement for Modal British Respondent
ggplot(uniqdata3, aes(x = (y))) + geom_line(aes(y = m_lab, colour = "Labor")) + geom_ribbon(aes(ymin = lower_lab,ymax = upper_lab,
linetype=NA), alpha = .25) +
geom_line(aes(y = m_con, color = "Conservatives")) +
geom_ribbon(aes(ymin = lower_con,
ymax = upper_con,
linetype=NA), alpha = .25) +
theme_bw() +
theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5)) + labs(title = "Predicted Party Ideological Placement for Modal British Respondent \n Survey = CSES") + theme(plot.title = element_text(color="black", size=20, face="bold.italic"), axis.title.x = element_text(color="black", size=15, face="italic"), axis.title.y = element_text(color="black", size=15, face="italic")) +
theme(legend.title = element_blank()) +
theme(axis.text.x = element_text(color="black", size= 12.5), axis.text.y = element_text(color="black", size=12.5)) + theme(legend.text = element_text(size=15)) + scale_x_continuous(name="Year", breaks=seq(1997, 2005, 2)) + scale_y_continuous(name="Left-Right Party Position", limits=c(0, 10))