用 R 中的置信区间按子组计算年龄标准化率
Calculate age standardised rates by sub-group with confidence intervals in R
我有一个如下所示的数据框:
df <- data.frame (
time = rep(c("2010", "2011", "2012", "2013", "2014"),4),
age = rep(c("40-44", "45-49", "50-54", "55-59", "60-64"),4),
weight = rep(c(0.38, 0.23, 0.19, 0.12, 0.08),4),
ethnic = rep(c(rep("M",5),rep("NM",5)),2),
gender = c(rep("M",10), rep("F",10)),
pop = round((runif(10, min = 10000, max = 99999)), digits = 0),
count = round((runif(10, min = 100, max = 999)), digits = 0)
)
df$rate = df$count / df$pop
我想计算直接年龄标准化发病率,其中发病率 = count/pop),以及这些的置信区间;对于每个子组。因此,对于时间、性别、种族、年龄的每种组合,我都会有一个标准化的费率。有没有办法在 R 中做到这一点?
我尝试使用 R 包 {epitools} 中的函数 ageadjust.direct
,如下所示:
age_adjust_test <- ageadjust.direct(count = df$count, pop = df$pop,
rate = df$rate, stdpop = df$weight)
由此输出的是整体调整后的利率、置信区间和粗利率。有没有办法让每个子组都得到这个输出?
只需使用 by
按一个或多个因素对数据帧进行子集化,然后将子集传递给您的函数。在这里,by
将 return 使用函数值的数据帧列表,如 docs page 所示。在 by
之外,您可以使用 do.call(rbind,...)
.
将所有 dfs 绑定到一个最终数据帧中
age_adjust_test_list <- by(df, df[,c("time", "gender", "ethnicity", "age")], function(sub) {
tmp <- ageadjust.direct(count = sub$count, pop = sub$pop,
rate = sub$rate, stdpop = sub$weight)
data.frame(time = max(sub$time),
gender = max(sub$gender),
ethnicity = max(sub$ethnicity),
age = max(sub$age),
crude_rate = tmp[[1]],
adj_rate = tmp[[2]],
lower_CI = tmp[[3]],
upper_CI = tmp[[4]])
})
final_df <- do.call(rbind, age_adjust_test_list)
NULL 将显示数据框中未表示的组合。所以考虑根据需要过滤掉:
age_adjust_test_list <- Filter(function(x) !is.null(x), age_adjust_test_list)
我们可以将 summarise
分组到 list
,然后 unnest
将 list
组件分组到单独的列中
library(tidyverse)
df %>%
group_by(time,age, ethnic, gender) %>%
summarise(age_adjust = list(ageadjust.direct(count = count,
pop = pop, rate = rate, stdpop = weight))) %>%
mutate(age_adjust = map(age_adjust, as.data.frame.list)) %>%
unnest
# A tibble: 20 x 8
# Groups: time, age, ethnic [10]
# time age ethnic gender crude.rate adj.rate lci uci
# <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
# 1 2010 40-44 M F 0.00763 0.00763 0.00709 0.00820
# 2 2010 40-44 M M 0.00763 0.00763 0.00709 0.00820
# 3 2010 40-44 NM F 0.0281 0.0281 0.0257 0.0306
# 4 2010 40-44 NM M 0.0281 0.0281 0.0257 0.0306
# 5 2011 45-49 M F 0.0145 0.0145 0.0136 0.0155
# 6 2011 45-49 M M 0.0145 0.0145 0.0136 0.0155
# 7 2011 45-49 NM F 0.0425 0.0425 0.0399 0.0453
# 8 2011 45-49 NM M 0.0425 0.0425 0.0399 0.0453
# 9 2012 50-54 M F 0.0116 0.0116 0.0109 0.0124
#10 2012 50-54 M M 0.0116 0.0116 0.0109 0.0124
#11 2012 50-54 NM F 0.00708 0.00708 0.00607 0.00821
#12 2012 50-54 NM M 0.00708 0.00708 0.00607 0.00821
#13 2013 55-59 M F 0.0251 0.0251 0.0232 0.0271
#14 2013 55-59 M M 0.0251 0.0251 0.0232 0.0271
#15 2013 55-59 NM F 0.00733 0.00733 0.00678 0.00792
#16 2013 55-59 NM M 0.00733 0.00733 0.00678 0.00792
#17 2014 60-64 M F 0.0101 0.0101 0.00944 0.0109
#18 2014 60-64 M M 0.0101 0.0101 0.00944 0.0109
#19 2014 60-64 NM F 0.00916 0.00916 0.00852 0.00984
#20 2014 60-64 NM M 0.00916 0.00916 0.00852 0.00984
这里有一个方便的data.table方法,一行就够了。
library(data.table)
library(epitools)
# convert df to data.table
setDT(df)
# define subgroups
group_by<-c('time','age', 'ethnic', 'gender')
# ageadjust.direct by subgroups. The trick is to include as.list()
df[, as.list(ageadjust.direct(count = count, pop = pop, rate = rate, stdpop = weight)), by=group_by]
我有一个如下所示的数据框:
df <- data.frame (
time = rep(c("2010", "2011", "2012", "2013", "2014"),4),
age = rep(c("40-44", "45-49", "50-54", "55-59", "60-64"),4),
weight = rep(c(0.38, 0.23, 0.19, 0.12, 0.08),4),
ethnic = rep(c(rep("M",5),rep("NM",5)),2),
gender = c(rep("M",10), rep("F",10)),
pop = round((runif(10, min = 10000, max = 99999)), digits = 0),
count = round((runif(10, min = 100, max = 999)), digits = 0)
)
df$rate = df$count / df$pop
我想计算直接年龄标准化发病率,其中发病率 = count/pop),以及这些的置信区间;对于每个子组。因此,对于时间、性别、种族、年龄的每种组合,我都会有一个标准化的费率。有没有办法在 R 中做到这一点?
我尝试使用 R 包 {epitools} 中的函数 ageadjust.direct
,如下所示:
age_adjust_test <- ageadjust.direct(count = df$count, pop = df$pop,
rate = df$rate, stdpop = df$weight)
由此输出的是整体调整后的利率、置信区间和粗利率。有没有办法让每个子组都得到这个输出?
只需使用 by
按一个或多个因素对数据帧进行子集化,然后将子集传递给您的函数。在这里,by
将 return 使用函数值的数据帧列表,如 docs page 所示。在 by
之外,您可以使用 do.call(rbind,...)
.
age_adjust_test_list <- by(df, df[,c("time", "gender", "ethnicity", "age")], function(sub) {
tmp <- ageadjust.direct(count = sub$count, pop = sub$pop,
rate = sub$rate, stdpop = sub$weight)
data.frame(time = max(sub$time),
gender = max(sub$gender),
ethnicity = max(sub$ethnicity),
age = max(sub$age),
crude_rate = tmp[[1]],
adj_rate = tmp[[2]],
lower_CI = tmp[[3]],
upper_CI = tmp[[4]])
})
final_df <- do.call(rbind, age_adjust_test_list)
NULL 将显示数据框中未表示的组合。所以考虑根据需要过滤掉:
age_adjust_test_list <- Filter(function(x) !is.null(x), age_adjust_test_list)
我们可以将 summarise
分组到 list
,然后 unnest
将 list
组件分组到单独的列中
library(tidyverse)
df %>%
group_by(time,age, ethnic, gender) %>%
summarise(age_adjust = list(ageadjust.direct(count = count,
pop = pop, rate = rate, stdpop = weight))) %>%
mutate(age_adjust = map(age_adjust, as.data.frame.list)) %>%
unnest
# A tibble: 20 x 8
# Groups: time, age, ethnic [10]
# time age ethnic gender crude.rate adj.rate lci uci
# <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
# 1 2010 40-44 M F 0.00763 0.00763 0.00709 0.00820
# 2 2010 40-44 M M 0.00763 0.00763 0.00709 0.00820
# 3 2010 40-44 NM F 0.0281 0.0281 0.0257 0.0306
# 4 2010 40-44 NM M 0.0281 0.0281 0.0257 0.0306
# 5 2011 45-49 M F 0.0145 0.0145 0.0136 0.0155
# 6 2011 45-49 M M 0.0145 0.0145 0.0136 0.0155
# 7 2011 45-49 NM F 0.0425 0.0425 0.0399 0.0453
# 8 2011 45-49 NM M 0.0425 0.0425 0.0399 0.0453
# 9 2012 50-54 M F 0.0116 0.0116 0.0109 0.0124
#10 2012 50-54 M M 0.0116 0.0116 0.0109 0.0124
#11 2012 50-54 NM F 0.00708 0.00708 0.00607 0.00821
#12 2012 50-54 NM M 0.00708 0.00708 0.00607 0.00821
#13 2013 55-59 M F 0.0251 0.0251 0.0232 0.0271
#14 2013 55-59 M M 0.0251 0.0251 0.0232 0.0271
#15 2013 55-59 NM F 0.00733 0.00733 0.00678 0.00792
#16 2013 55-59 NM M 0.00733 0.00733 0.00678 0.00792
#17 2014 60-64 M F 0.0101 0.0101 0.00944 0.0109
#18 2014 60-64 M M 0.0101 0.0101 0.00944 0.0109
#19 2014 60-64 NM F 0.00916 0.00916 0.00852 0.00984
#20 2014 60-64 NM M 0.00916 0.00916 0.00852 0.00984
这里有一个方便的data.table方法,一行就够了。
library(data.table)
library(epitools)
# convert df to data.table
setDT(df)
# define subgroups
group_by<-c('time','age', 'ethnic', 'gender')
# ageadjust.direct by subgroups. The trick is to include as.list()
df[, as.list(ageadjust.direct(count = count, pop = pop, rate = rate, stdpop = weight)), by=group_by]