在函数和 for 循环中取列的平均值
Taking the mean of a column within a function and a for loop
我有以下功能:
compute_treatment_effects <- function(dataset, outcome, baseline_outcome,
covariates,
standardize){
baseline_covariates <- c(baseline_outcome, covariates)
dataset <- dataset %>%
mutate(treat =ifelse(treatment_group == "trt", 1,
ifelse(treatment_group == "control", 0, NA))) %>%
filter(!is.na(treat))
if (standardize){
dataset[,outcome] <- (dataset[,outcome] - mean(dataset[dataset$treat==0,outcome], na.rm=TRUE))/
sd(dataset[dataset$treat==0,outcome], na.rm=TRUE)
}
}
现在的问题是,每当涉及标准化程序时,我都会收到错误消息:
“is.data.frame(x) 中的错误:
'list' 无法强制对象键入 'double'
另外: 警告信息:
在 mean.default(数据集[数据集$treat == 0, 结果], na.rm = TRUE)"
我真的不确定为什么会这样,我不认为语法有任何错误?
这是与代码一起使用的数据框示例:
dataframe <- data.frame("var1" = c(1, 2, 5, 1, 642, 5, 1, 2, 5, 9, NA, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 ),
"Var2" = c(1, 3, 5, 1, 642, 5, NA, NA, NA, NA, NA, NA, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 ),
"var3" = c(1, 2, 635, 9, NA, 1, 2, 5, NA, NA, 12, NA, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10),
"var4" = c(1, 21, 15, 19, NA, 1, 26656, 56,6 , NA, 512, NA, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10),
"cov1" = c(1, 22,335, 29, NA, NA, NA, 645, NA, NA, 12, NA, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10),
"cov2" = c(44251, 2322,5, 29, 45, 35, 42, 645, 55, 525, NA, NA, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10),
"cov3" = c(154, 2552,35, 53529, 5, 3, 53542, 645, 25, 2, 12, 23, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10))
dataframe <- dataframe %>%
mutate(treatment_group = ifelse(var3 == 2, "trt", ifelse(var3 == 10, "control", NA)))
dataset <- dataframe
outcome <- "Var2"
baseline_outcome <- "var1"
covariates = c("cov1", "cov2","cov3")
非常感谢!!!
OP 的原始数据集可能是 tibble
或 data.table
,因为当我们 , column
因为与 data.frame
(即 drop = TRUE
)
相比,两者都是 drop = FALSE
> compute_treatment_effects(as_tibble(dataset), outcome, baseline_outcome, covariates, standardize = TRUE)
Error in is.data.frame(x) :
'list' object cannot be coerced to type 'double'
In addition: Warning message:
In mean.default(dataset[dataset$treat == 0, outcome], na.rm = TRUE) :
argument is not numeric or logical: returning NA
解决方法是将 data.frame
转换为 as.data.frame
compute_treatment_effects(as.data.frame(dataset), outcome, baseline_outcome, covariates, standardize = TRUE)
-输出
var1 Var2 var3 var4 cov1 cov2 cov3 treatment_group treat
1 2 -Inf 2 21 22 2322 2552 trt 1
2 1 NA 2 26656 NA 42 53542 trt 1
3 10 NaN 10 10 10 10 10 control 0
4 10 NaN 10 10 10 10 10 control 0
5 10 NaN 10 10 10 10 10 control 0
6 10 NaN 10 10 10 10 10 control 0
7 10 NaN 10 10 10 10 10 control 0
8 10 NaN 10 10 10 10 10 control 0
9 10 NaN 10 10 10 10 10 control 0
10 10 NaN 10 10 10 10 10 control 0
11 10 NaN 10 10 10 10 10 control 0
12 10 NaN 10 10 10 10 10 control 0
或者通过使用 [[
而不是 [
来对列进行子集化,即
来对函数进行更改
compute_treatment_effects <- function(dataset, outcome, baseline_outcome,
covariates,
standardize){
baseline_covariates <- c(baseline_outcome, covariates)
dataset <- dataset %>%
mutate(treat =ifelse(treatment_group == "trt", 1,
ifelse(treatment_group == "control", 0, NA))) %>%
filter(!is.na(treat))
if (standardize){
dataset[[outcome]] <- (dataset[[outcome]] -
mean(dataset[[outcome]][dataset$treat==0], na.rm=TRUE))/
sd(dataset[[outcome]][dataset$treat==0], na.rm=TRUE)
}
dataset
}
我有以下功能:
compute_treatment_effects <- function(dataset, outcome, baseline_outcome,
covariates,
standardize){
baseline_covariates <- c(baseline_outcome, covariates)
dataset <- dataset %>%
mutate(treat =ifelse(treatment_group == "trt", 1,
ifelse(treatment_group == "control", 0, NA))) %>%
filter(!is.na(treat))
if (standardize){
dataset[,outcome] <- (dataset[,outcome] - mean(dataset[dataset$treat==0,outcome], na.rm=TRUE))/
sd(dataset[dataset$treat==0,outcome], na.rm=TRUE)
}
}
现在的问题是,每当涉及标准化程序时,我都会收到错误消息:
“is.data.frame(x) 中的错误: 'list' 无法强制对象键入 'double' 另外: 警告信息: 在 mean.default(数据集[数据集$treat == 0, 结果], na.rm = TRUE)"
我真的不确定为什么会这样,我不认为语法有任何错误?
这是与代码一起使用的数据框示例:
dataframe <- data.frame("var1" = c(1, 2, 5, 1, 642, 5, 1, 2, 5, 9, NA, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 ),
"Var2" = c(1, 3, 5, 1, 642, 5, NA, NA, NA, NA, NA, NA, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 ),
"var3" = c(1, 2, 635, 9, NA, 1, 2, 5, NA, NA, 12, NA, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10),
"var4" = c(1, 21, 15, 19, NA, 1, 26656, 56,6 , NA, 512, NA, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10),
"cov1" = c(1, 22,335, 29, NA, NA, NA, 645, NA, NA, 12, NA, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10),
"cov2" = c(44251, 2322,5, 29, 45, 35, 42, 645, 55, 525, NA, NA, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10),
"cov3" = c(154, 2552,35, 53529, 5, 3, 53542, 645, 25, 2, 12, 23, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10))
dataframe <- dataframe %>%
mutate(treatment_group = ifelse(var3 == 2, "trt", ifelse(var3 == 10, "control", NA)))
dataset <- dataframe
outcome <- "Var2"
baseline_outcome <- "var1"
covariates = c("cov1", "cov2","cov3")
非常感谢!!!
OP 的原始数据集可能是 tibble
或 data.table
,因为当我们 , column
因为与 data.frame
(即 drop = TRUE
)
drop = FALSE
> compute_treatment_effects(as_tibble(dataset), outcome, baseline_outcome, covariates, standardize = TRUE)
Error in is.data.frame(x) : 'list' object cannot be coerced to type 'double' In addition: Warning message: In mean.default(dataset[dataset$treat == 0, outcome], na.rm = TRUE) : argument is not numeric or logical: returning NA
解决方法是将 data.frame
转换为 as.data.frame
compute_treatment_effects(as.data.frame(dataset), outcome, baseline_outcome, covariates, standardize = TRUE)
-输出
var1 Var2 var3 var4 cov1 cov2 cov3 treatment_group treat
1 2 -Inf 2 21 22 2322 2552 trt 1
2 1 NA 2 26656 NA 42 53542 trt 1
3 10 NaN 10 10 10 10 10 control 0
4 10 NaN 10 10 10 10 10 control 0
5 10 NaN 10 10 10 10 10 control 0
6 10 NaN 10 10 10 10 10 control 0
7 10 NaN 10 10 10 10 10 control 0
8 10 NaN 10 10 10 10 10 control 0
9 10 NaN 10 10 10 10 10 control 0
10 10 NaN 10 10 10 10 10 control 0
11 10 NaN 10 10 10 10 10 control 0
12 10 NaN 10 10 10 10 10 control 0
或者通过使用 [[
而不是 [
来对列进行子集化,即
compute_treatment_effects <- function(dataset, outcome, baseline_outcome,
covariates,
standardize){
baseline_covariates <- c(baseline_outcome, covariates)
dataset <- dataset %>%
mutate(treat =ifelse(treatment_group == "trt", 1,
ifelse(treatment_group == "control", 0, NA))) %>%
filter(!is.na(treat))
if (standardize){
dataset[[outcome]] <- (dataset[[outcome]] -
mean(dataset[[outcome]][dataset$treat==0], na.rm=TRUE))/
sd(dataset[[outcome]][dataset$treat==0], na.rm=TRUE)
}
dataset
}