找出加权比例的标准误差(调查数据)
Find the standard error of weighted proportions (survey data)
我创建了一个用户函数来为我的分类调查数据生成交叉表。例如,我编写了一个按性别查找 Q50_1 的函数。看起来不错。
但是现在,想添加一列,每行对应的 SE,但我想不出解决办法。这是我想要的输出(本例中的 SE 是假的)。
这是我的数据:
data_in <- read_table2("Q50_1 Q50_2 Q38 Q90 pov gender wgt
never always Yes 2 High M 1.3
sometimes always No 2 Med F 0.4
always sometimes Yes 4 Low F 1.2
never never No 2 High M 0.5
always always No 4 High M 0.7
sometimes never Yes 3 Low F 0.56
sometimes never Yes 2 Med F 0.9
")
这是我创建的用于生成交叉表的函数:
library(tidyverse)
library(pollster)
# USER FUNCTION THAT RUNS CROSS TABS (FEEDS INTO NEXT FUNCTION)
xtab_func <- function(data, col, target,weight){
col <- sym(col)
target <- enquo(target)
weight <- enquo(weight)
pollster::crosstab(df = data, !!target, !!col, !!weight, format = "long") %>% pivot_longer(cols = starts_with("Q"))
}
crosstab <- function(data, target, columns, weight,target_name){
target <- enquo(target)
weight <- enquo(weight)
target_name <- enquo(target_name)
d <- list()
for (i in columns){
x <- xtab_func(data, i, !!target,!!weight)
x$i <- i
d[[i]] <- x
df <- do.call(rbind, d) %>%
rename(group = !!target,
question = name) %>%
mutate(group_level =!!target_name) %>%
select(-i)
}
return(df)
}
crosstab(data_in,gender, c("Q50_1","Q50_2"),wgt,"gender")
This post 显示了它背后的数学原理,但我在应用程序上找不到任何内容。感谢您的帮助!
编辑
我可以使用 pollster:moe_crosstab 来产生误差范围,但 pollster 似乎没有办法添加标准误差。
我们可以修改函数来计算误差范围 (moe_crosstab
) 并根据置信区间水平计算标准误差
library(dplyr)
library(purrr)
library(tidyr)
library(pollster)
crosstab <- function(data, target, columns, weight,target_name, zval){
target <- enquo(target)
weight <- enquo(weight)
target_name <- enquo(target_name)
return(purrr::map_dfr(columns, ~ xtab_func(data,
.x, !!target, !!weight, zval)))
}
xtab_func <- function(data, col, target, weight, zval){
col <- sym(col)
target <- enquo(target)
weight <- enquo(weight)
pollster::crosstab(df = data, !!target, !!col, !!weight, format = "long") %>%
bind_cols(pollster::moe_crosstab(df = data, !!col,
!!target, !! weight) %>%
transmute(SE = moe/zval)) %>%
pivot_longer(cols = starts_with("Q"))
}
-测试
crosstab(data_in,gender, c("Q50_1", "Q50_2"), wgt, "gender", 1.96)
我创建了一个用户函数来为我的分类调查数据生成交叉表。例如,我编写了一个按性别查找 Q50_1 的函数。看起来不错。
但是现在,想添加一列,每行对应的 SE,但我想不出解决办法。这是我想要的输出(本例中的 SE 是假的)。
这是我的数据:
data_in <- read_table2("Q50_1 Q50_2 Q38 Q90 pov gender wgt
never always Yes 2 High M 1.3
sometimes always No 2 Med F 0.4
always sometimes Yes 4 Low F 1.2
never never No 2 High M 0.5
always always No 4 High M 0.7
sometimes never Yes 3 Low F 0.56
sometimes never Yes 2 Med F 0.9
")
这是我创建的用于生成交叉表的函数:
library(tidyverse)
library(pollster)
# USER FUNCTION THAT RUNS CROSS TABS (FEEDS INTO NEXT FUNCTION)
xtab_func <- function(data, col, target,weight){
col <- sym(col)
target <- enquo(target)
weight <- enquo(weight)
pollster::crosstab(df = data, !!target, !!col, !!weight, format = "long") %>% pivot_longer(cols = starts_with("Q"))
}
crosstab <- function(data, target, columns, weight,target_name){
target <- enquo(target)
weight <- enquo(weight)
target_name <- enquo(target_name)
d <- list()
for (i in columns){
x <- xtab_func(data, i, !!target,!!weight)
x$i <- i
d[[i]] <- x
df <- do.call(rbind, d) %>%
rename(group = !!target,
question = name) %>%
mutate(group_level =!!target_name) %>%
select(-i)
}
return(df)
}
crosstab(data_in,gender, c("Q50_1","Q50_2"),wgt,"gender")
This post 显示了它背后的数学原理,但我在应用程序上找不到任何内容。感谢您的帮助!
编辑
我可以使用 pollster:moe_crosstab 来产生误差范围,但 pollster 似乎没有办法添加标准误差。
我们可以修改函数来计算误差范围 (moe_crosstab
) 并根据置信区间水平计算标准误差
library(dplyr)
library(purrr)
library(tidyr)
library(pollster)
crosstab <- function(data, target, columns, weight,target_name, zval){
target <- enquo(target)
weight <- enquo(weight)
target_name <- enquo(target_name)
return(purrr::map_dfr(columns, ~ xtab_func(data,
.x, !!target, !!weight, zval)))
}
xtab_func <- function(data, col, target, weight, zval){
col <- sym(col)
target <- enquo(target)
weight <- enquo(weight)
pollster::crosstab(df = data, !!target, !!col, !!weight, format = "long") %>%
bind_cols(pollster::moe_crosstab(df = data, !!col,
!!target, !! weight) %>%
transmute(SE = moe/zval)) %>%
pivot_longer(cols = starts_with("Q"))
}
-测试
crosstab(data_in,gender, c("Q50_1", "Q50_2"), wgt, "gender", 1.96)