编写一个程序来引入排列
writing a program to introduce permutations
基本上我想编写一个程序,将我的数据顺序随机化 n
次,然后完成生存分析并在 n
上绘制输出
因此,让我们从 matching()
包中获取以下通用数据,并创建一个包含接受治疗和未接受治疗的人的数据集。 Link to package
set.seed(123)
library(Matching)
data(lalonde)
lalonde$age_cat <- with(lalonde, ifelse(age < 24, 1, 2))
attach(lalonde)
lalonde$ID <- 1:length(lalonde$age)
#The covariates we want to match on
X = cbind(age_cat, educ, black, hisp, married, nodegr, u74, u75, re75, re74)
#The covariates we want to obtain balance on
BalanceMat <- cbind(age_cat, educ, black, hisp, married, nodegr, u74, u75, re75, re74,
I(re74*re75))
genout <- GenMatch(Tr=treat, X=X, BalanceMatrix=BalanceMat, estimand="ATE", M=1,
pop.size=16, max.generations=10, wait.generations=1)
detach(lalonde)
# now lets pair the the non-treated collisions to the treated
# BUT lets pair WITHOUT REPLACEMENT
mout <- Match(Y=NULL, Tr=lalonde$treat, X=X,
Weight.matrix=genout, M=2,
replace=FALSE, ties=TRUE)
summary(mout)
# we see that for 130 treated observations, we have 260 non-treated
# this is because we set M=2
# and yes length(lalonde$age[lalonde$treat==0]) == 260 but just follow me please
# but this was done for a specific reason
# now lets create a table for our 130+260 collisions
treated <- lalonde[mout$index.treated,]
# now we only want one occurence of the treated variables
library(dplyr)
treat_clean <- treated %>%
group_by(ID) %>%
slice(1)
non.treated <- lalonde[mout$index.control,]
# finally we can combine to form one clear data.set
matched.data <- rbind(treat_clean, non.treated)
我们现在可以进行条件逻辑回归来确定与 re78(1987 年赚取的钱)和治疗相关的 OR。为此,我们需要生存包。 Link to package
library(survival)
如果居住者在 1978 年的收入超过 8125,则假设成功
matched.data$success <- with(matched.data, ifelse(re78 > 8125, 1, 0))
output <- clogit(success ~ treat, matched.data, method = 'efron')
summary(output)
所以我们看到治疗 (treat=1) 的 OR 是 1.495
我们可以将其保存为:
iteration.1 <- exp(output$coefficients[1])
现在我们从匹配包 (link) 中读取 replace = FALSE
注意,如果为 FALSE,
比赛的顺序通常很重要。比赛将在
与数据排序的顺序相同
所以我想做的是创建一个函数,该函数将 n
次
- 随机分配 lalonde$ID 订单
- 运行匹配过程
- 运行 clogit 算法
- 每次保存输出
exp(output$coefficients[1])
- 为每个 n
绘制 OR (exp(output$coefficients[1])
)
Is essenece 我想在分析中引入排列。
当我们说 n=5
时,如何做到这一点
可以使用sample
引入排列
data(lalonde)
lalonde$age_cat <- with(lalonde, ifelse(age < 24, 1, 2))
lalonde$ID <- 1:length(lalonde$age)
n <- 5
res <- rep(NA, n)
for (i in 1:n) {
lalonde <- lalonde[sample(1:nrow(lalonde)), ] # randomise order
## rest of code
res[i] <- exp(output$coefficients[1])
}
plot(1:n, res, main="Odds Ratios")
我是 replicate
的超级粉丝,因为这样的事情:
X <- cbind(...) # what you had before
BalanceMat <- cbind(...) # ditto
lalonde$ID <- seq.int(nrow(lalonde))
results <- replicate(1000, {
## not certain if it's just $ID order that matters
lalonde$ID <- sample(nrow(lalonde))
## lalonde <- lalonde[ sample(nrow(lalonde)), ]
## ...
## rest of your computation
## ...
#### optionally return everything
## output
#### return just the minimum
exp(output$coefficients[1])
})
#### if you returned output earlier, you'll need this, otherwise not
## coef <- exp(sapply(results, function(z) z$coefficients[1]))
## plot as needed
我不知道你的意思是 ID
的顺序重要还是整个数据库的顺序;相应地调整 replicate
循环的前几行。
基本上我想编写一个程序,将我的数据顺序随机化 n
次,然后完成生存分析并在 n
因此,让我们从 matching()
包中获取以下通用数据,并创建一个包含接受治疗和未接受治疗的人的数据集。 Link to package
set.seed(123)
library(Matching)
data(lalonde)
lalonde$age_cat <- with(lalonde, ifelse(age < 24, 1, 2))
attach(lalonde)
lalonde$ID <- 1:length(lalonde$age)
#The covariates we want to match on
X = cbind(age_cat, educ, black, hisp, married, nodegr, u74, u75, re75, re74)
#The covariates we want to obtain balance on
BalanceMat <- cbind(age_cat, educ, black, hisp, married, nodegr, u74, u75, re75, re74,
I(re74*re75))
genout <- GenMatch(Tr=treat, X=X, BalanceMatrix=BalanceMat, estimand="ATE", M=1,
pop.size=16, max.generations=10, wait.generations=1)
detach(lalonde)
# now lets pair the the non-treated collisions to the treated
# BUT lets pair WITHOUT REPLACEMENT
mout <- Match(Y=NULL, Tr=lalonde$treat, X=X,
Weight.matrix=genout, M=2,
replace=FALSE, ties=TRUE)
summary(mout)
# we see that for 130 treated observations, we have 260 non-treated
# this is because we set M=2
# and yes length(lalonde$age[lalonde$treat==0]) == 260 but just follow me please
# but this was done for a specific reason
# now lets create a table for our 130+260 collisions
treated <- lalonde[mout$index.treated,]
# now we only want one occurence of the treated variables
library(dplyr)
treat_clean <- treated %>%
group_by(ID) %>%
slice(1)
non.treated <- lalonde[mout$index.control,]
# finally we can combine to form one clear data.set
matched.data <- rbind(treat_clean, non.treated)
我们现在可以进行条件逻辑回归来确定与 re78(1987 年赚取的钱)和治疗相关的 OR。为此,我们需要生存包。 Link to package
library(survival)
如果居住者在 1978 年的收入超过 8125,则假设成功
matched.data$success <- with(matched.data, ifelse(re78 > 8125, 1, 0))
output <- clogit(success ~ treat, matched.data, method = 'efron')
summary(output)
所以我们看到治疗 (treat=1) 的 OR 是 1.495
我们可以将其保存为:
iteration.1 <- exp(output$coefficients[1])
现在我们从匹配包 (link) 中读取 replace = FALSE
注意,如果为 FALSE,
比赛的顺序通常很重要。比赛将在
与数据排序的顺序相同
所以我想做的是创建一个函数,该函数将 n
次
- 随机分配 lalonde$ID 订单
- 运行匹配过程
- 运行 clogit 算法
- 每次保存输出
exp(output$coefficients[1])
- 为每个 n 绘制 OR (
exp(output$coefficients[1])
)
Is essenece 我想在分析中引入排列。 当我们说 n=5
时,如何做到这一点可以使用sample
引入排列
data(lalonde)
lalonde$age_cat <- with(lalonde, ifelse(age < 24, 1, 2))
lalonde$ID <- 1:length(lalonde$age)
n <- 5
res <- rep(NA, n)
for (i in 1:n) {
lalonde <- lalonde[sample(1:nrow(lalonde)), ] # randomise order
## rest of code
res[i] <- exp(output$coefficients[1])
}
plot(1:n, res, main="Odds Ratios")
我是 replicate
的超级粉丝,因为这样的事情:
X <- cbind(...) # what you had before
BalanceMat <- cbind(...) # ditto
lalonde$ID <- seq.int(nrow(lalonde))
results <- replicate(1000, {
## not certain if it's just $ID order that matters
lalonde$ID <- sample(nrow(lalonde))
## lalonde <- lalonde[ sample(nrow(lalonde)), ]
## ...
## rest of your computation
## ...
#### optionally return everything
## output
#### return just the minimum
exp(output$coefficients[1])
})
#### if you returned output earlier, you'll need this, otherwise not
## coef <- exp(sapply(results, function(z) z$coefficients[1]))
## plot as needed
我不知道你的意思是 ID
的顺序重要还是整个数据库的顺序;相应地调整 replicate
循环的前几行。