使用 lapply 跨数据集生成新变量,条件是不存在
Using lapply to generate new variables across data sets, conditional on not existing
假设我有三个数据集:
df1 <- data.frame(var1 = c(1,2,3), var2 = c(1,2,3))
df2 <- data.frame(var1 = c(1,2,3), var2 = c(1,2,3))
df3 <- data.frame(var1 = c(1,2,3), var2 = c(1,2,3), var3 = c(1,2,3))
我想检查每个数据集中是否存在变量 "var3"。如果没有,我想生成一个名为 "var3" 的空变量。这是我正在尝试的:
dframes <- list(df1,df2,df3)
lapply(dframes, function(df) {
ifelse("var3" %in% colnames(df), print("var3 exists"), df$var3 <- NA)
})
输出结果为:
[[1]]
[1] NA
[[2]]
[1] NA
[[3]]
[1] "var3 exists"
并且没有为前两个数据集生成所需的 "var3" 变量 - 它们仍然只包含 "var1" 和 "var2"。
感谢您的帮助。
就把大家说的整理成一个完整的答案:
df1 <- data.frame(var1 = c(1,2,3), var2 = c(1,2,3))
df2 <- data.frame(var1 = c(1,2,3), var2 = c(1,2,3))
df3 <- data.frame(var1 = c(1,2,3), var2 = c(1,2,3), var3 = c(1,2,3))
dframes <- list(df1,df2,df3)
dfframes_fmt <- lapply(dframes, function(df) {
if(! "var3" %in% colnames(df)) {
df$var3 <- NA
}
df
})
> dfframes_fmt
[[1]]
var1 var2 var3
1 1 1 NA
2 2 2 NA
3 3 3 NA
[[2]]
var1 var2 var3
1 1 1 NA
2 2 2 NA
3 3 3 NA
[[3]]
var1 var2 var3
1 1 1 1
2 2 2 2
3 3 3 3
为了更新到原来的名字,你可以这样做:
dfnames <- c("df1", "df2", "df3")
# assemble the list of data frames
dframes <- eval(parse(text = paste0("list(", paste0(dfnames, collapse = ","), ")")))
for(k in seq_along(dframes)){
set <- dframes[[k]]
if(! "var3" %in% colnames(set)) {
set$var3 <- NA
}
# assign the df back to the original name
eval(parse(text = paste0(dfnames[k], " = set")))
}
> df1
var1 var2 var3
1 1 1 NA
2 2 2 NA
3 3 3 NA
> df2
var1 var2 var3
1 1 1 NA
2 2 2 NA
3 3 3 NA
> df3
var1 var2 var3
1 1 1 1
2 2 2 2
3 3 3 3
假设我有三个数据集:
df1 <- data.frame(var1 = c(1,2,3), var2 = c(1,2,3))
df2 <- data.frame(var1 = c(1,2,3), var2 = c(1,2,3))
df3 <- data.frame(var1 = c(1,2,3), var2 = c(1,2,3), var3 = c(1,2,3))
我想检查每个数据集中是否存在变量 "var3"。如果没有,我想生成一个名为 "var3" 的空变量。这是我正在尝试的:
dframes <- list(df1,df2,df3)
lapply(dframes, function(df) {
ifelse("var3" %in% colnames(df), print("var3 exists"), df$var3 <- NA)
})
输出结果为:
[[1]]
[1] NA
[[2]]
[1] NA
[[3]]
[1] "var3 exists"
并且没有为前两个数据集生成所需的 "var3" 变量 - 它们仍然只包含 "var1" 和 "var2"。
感谢您的帮助。
就把大家说的整理成一个完整的答案:
df1 <- data.frame(var1 = c(1,2,3), var2 = c(1,2,3))
df2 <- data.frame(var1 = c(1,2,3), var2 = c(1,2,3))
df3 <- data.frame(var1 = c(1,2,3), var2 = c(1,2,3), var3 = c(1,2,3))
dframes <- list(df1,df2,df3)
dfframes_fmt <- lapply(dframes, function(df) {
if(! "var3" %in% colnames(df)) {
df$var3 <- NA
}
df
})
> dfframes_fmt
[[1]]
var1 var2 var3
1 1 1 NA
2 2 2 NA
3 3 3 NA
[[2]]
var1 var2 var3
1 1 1 NA
2 2 2 NA
3 3 3 NA
[[3]]
var1 var2 var3
1 1 1 1
2 2 2 2
3 3 3 3
为了更新到原来的名字,你可以这样做:
dfnames <- c("df1", "df2", "df3")
# assemble the list of data frames
dframes <- eval(parse(text = paste0("list(", paste0(dfnames, collapse = ","), ")")))
for(k in seq_along(dframes)){
set <- dframes[[k]]
if(! "var3" %in% colnames(set)) {
set$var3 <- NA
}
# assign the df back to the original name
eval(parse(text = paste0(dfnames[k], " = set")))
}
> df1
var1 var2 var3
1 1 1 NA
2 2 2 NA
3 3 3 NA
> df2
var1 var2 var3
1 1 1 NA
2 2 2 NA
3 3 3 NA
> df3
var1 var2 var3
1 1 1 1
2 2 2 2
3 3 3 3