如何在 r 中循环调用不同的数据帧并向它们添加不同的计算结果?
How to call up different dataframes in loops in r and add to them different computational results?
如何自动执行以下步骤?
我有以下我想做的事情的例子——最后得到一个由在前面的步骤中自动生成的较小数据帧组成的数据帧。这些较小的数据框还需要在聚合之前在其中完成计算。我可以用一个长脚本手动完成所有操作,但似乎无法弄清楚如何正确组合 list()、apply() 或 for() 循环以获得我想要的结果(不确定这些是这里的最佳选择) .
请指教。
谢谢!
########### 我在详细代码中的问题
# DATASET
a <- c(2.0, 2.4, 2.1, 2.2, 2.3)
b <- c(4.0, 0, 4.5, 4.4, 4.8)
c <- c(0.3, 0.2, 2.0, 2.1, 2.3)
d <- c(5.0, 4.8, 4.8, 4.9, 5.0)
test.data <- data.frame(rbind(a,b,c,d))
#STEP 1: create separate dfs and do different calculations by column in each
#LONG WAY, MANUAL
# calculates % difference between each value with respect to first value in row
# in df1, then second value in row for df2, etc.
nc <- ncol(test.data)
df1 <- (test.data[,1:nc] - test.data[[1]])/(test.data[[1]])*100
df2 <- (test.data[,1:nc] - test.data[[2]])/(test.data[[2]])*100
df3 <- (test.data[,1:nc] - test.data[[3]])/(test.data[[3]])*100
df4 <- (test.data[,1:nc] - test.data[[4]])/(test.data[[4]])*100
df5 <- (test.data[,1:nc] - test.data[[5]])/(test.data[[5]])*100
# some results from above give Inf (since divided by zero), so set those to NA
df1[df1==Inf] <- NA
df2[df2==Inf] <- NA
df3[df3==Inf] <- NA
df4[df4==Inf] <- NA
df4[df4==Inf] <- NA
df5[df5==Inf] <- NA
#next will filter each calculated %-value by the specified percent difference filter
# and save the results in separate associated dataframes.
percent.diff <- 30
df.A1 <- data.frame(ifelse(df1 > -percent.diff & df1 < percent.diff, 1, 0))
df.A2 <- data.frame(ifelse(df2 > -percent.diff & df2 < percent.diff, 1, 0))
df.A3 <- data.frame(ifelse(df3 > -percent.diff & df3 < percent.diff, 1, 0))
df.A4 <- data.frame(ifelse(df4 > -percent.diff & df4 < percent.diff, 1, 0))
df.A5 <- data.frame(ifelse(df5 > -percent.diff & df5 < percent.diff, 1, 0))
#next add ID columns to each of the newly created dataframes
obs <- 4
#add row and df ID variables to each of the above
df.A1["df.cat"] <- 1
df.A1["row"] <- 1:obs
df.A2["df.cat"] <- 2
df.A2["row"] <- 1:obs
df.A3["df.cat"] <- 3
df.A3["row"] <- 1:obs
df.A4["df.cat"] <- 4
df.A4["row"] <- 1:obs
df.A5["df.cat"] <- 5
df.A5["row"] <- 1:obs
#combine the individual dataframes with IDs into a single dataframe.
Combo.df <-list(df.A1, df.A2, df.A3, df.A4, df.A5)
All.df <- Reduce(rbind, Combo.df)
最终输出应如下所示(仅显示前几行)
X1 X2 X3 X4 X5 df.cat row
a 1 1 1 1 1 1 1
b 1 0 1 1 1 1 2
c 1 0 0 0 0 1 3
d 1 1 1 1 1 1 4
a1 1 1 1 1 1 2 1
b1 1 1 1 1 1 2 2
c1 0 1 0 0 0 2 3
d1 1 1 1 1 1 2 4
a2 1 1 1 1 1 3 1
b2 1 0 1 1 1 3 2
c2 0 0 1 1 1 3 3
d2 1 1 1 1 1 3 4
尝试自动执行上述步骤失败
#
a) created the number of dataframes I will need
num.reps <- 5
obs <- 4
n.cols <- 5
lst <- replicate(num.reps,data.frame(matrix(NA, nrow = obs, ncol = n.cols)), simplify=FALSE)
names(lst) <- paste0('df', 1:num.reps)
list2env(lst, envir = .GlobalEnv)
# b) fill dataframes (not sure how to call up dataframe by sequential names in loop)
# THIS DOES NOT WORK
f.diff.calc <- function(i)
{df[[i]] <-(df[,1:nc] - df[[i]])/(df[[i]])*100}
diff.calc.list <- replicate(5, f.diff.calc(list))
#Error in `[.data.frame`(df, , 1:nc) : undefined columns selected
这是对您的代码的简化,据我所知,它可以满足您的要求。
fun1 <- function(col, DF = test.data){
res <- 100*(DF - DF[[col]])/DF[[col]]
is.na(res) <- is.infinite(as.matrix(res))
res
}
fun2 <- function(DF, percent.diff = 30){
data.frame(ifelse(-percent.diff < DF & DF < percent.diff, 1, 0))
}
df_list <- lapply(seq_len(ncol(test.data)), fun1)
names(df_list) <- paste0("df", seq_along(df_list))
#next will filter each calculated %-value by the specified percent difference filter
# and save the results in a list of dataframes.
percent.diff <- 30
df.A_list <- lapply(df_list, fun2)
#next add ID columns to each of the newly created dataframes
tmp <- names(df.A_list)
df.A_list <- lapply(seq_along(df.A_list), function(i){
df.A_list[[i]][["df.cat"]] <- i
df.A_list[[i]][["row"]] <- seq_len(nrow(df.A_list[[i]]))
df.A_list[[i]]
})
names(df.A_list) <- tmp
# combine the results in one dataframe
All.df <- do.call(rbind, df.A_list)
好吧,我真诚地认为通过更多的研究你可以解决它。此外,我无法重新创建您获得的确切输出,但我能够匹配使用您的代码获得的输出。
这是代码的自动化版本。
a <- c(2.0, 2.4, 2.1, 2.2, 2.3)
b <- c(4.0, 0, 4.5, 4.4, 4.8)
c <- c(0.3, 0.2, 2.0, 2.1, 2.3)
d <- c(5.0, 4.8, 4.8, 4.9, 5.0)
test.data <- data.frame(rbind(a,b,c,d))
#STEP 1: create separate dfs and do different calculations by column in each
#LONG WAY, MANUAL
# calculates % difference between each value with respect to first value in row
# in df1, then second value in row for df2, etc.
nc <- ncol(test.data)
calc<-function(x,percent.diff=30,i){
x[x==Inf] <- NA
obs<-4
x.A<- data.frame(ifelse(x > -percent.diff & x < percent.diff, 1, 0))
x.A$df.cat<-i
x.A$row<-1:obs
return(x.A)
}
output<-data.frame()
for(i in 1:5){
assign(paste('df',i,sep=""),(test.data[,1:nc] - test.data[[i]])/(test.data[[i]])*100)
}
for(i in 1:5){
output<-rbind.data.frame(output,calc(x = get(paste('df',i,sep="")),percent.diff = 30,i=i))
}
如何自动执行以下步骤? 我有以下我想做的事情的例子——最后得到一个由在前面的步骤中自动生成的较小数据帧组成的数据帧。这些较小的数据框还需要在聚合之前在其中完成计算。我可以用一个长脚本手动完成所有操作,但似乎无法弄清楚如何正确组合 list()、apply() 或 for() 循环以获得我想要的结果(不确定这些是这里的最佳选择) . 请指教。 谢谢!
########### 我在详细代码中的问题# DATASET
a <- c(2.0, 2.4, 2.1, 2.2, 2.3)
b <- c(4.0, 0, 4.5, 4.4, 4.8)
c <- c(0.3, 0.2, 2.0, 2.1, 2.3)
d <- c(5.0, 4.8, 4.8, 4.9, 5.0)
test.data <- data.frame(rbind(a,b,c,d))
#STEP 1: create separate dfs and do different calculations by column in each
#LONG WAY, MANUAL
# calculates % difference between each value with respect to first value in row
# in df1, then second value in row for df2, etc.
nc <- ncol(test.data)
df1 <- (test.data[,1:nc] - test.data[[1]])/(test.data[[1]])*100
df2 <- (test.data[,1:nc] - test.data[[2]])/(test.data[[2]])*100
df3 <- (test.data[,1:nc] - test.data[[3]])/(test.data[[3]])*100
df4 <- (test.data[,1:nc] - test.data[[4]])/(test.data[[4]])*100
df5 <- (test.data[,1:nc] - test.data[[5]])/(test.data[[5]])*100
# some results from above give Inf (since divided by zero), so set those to NA
df1[df1==Inf] <- NA
df2[df2==Inf] <- NA
df3[df3==Inf] <- NA
df4[df4==Inf] <- NA
df4[df4==Inf] <- NA
df5[df5==Inf] <- NA
#next will filter each calculated %-value by the specified percent difference filter
# and save the results in separate associated dataframes.
percent.diff <- 30
df.A1 <- data.frame(ifelse(df1 > -percent.diff & df1 < percent.diff, 1, 0))
df.A2 <- data.frame(ifelse(df2 > -percent.diff & df2 < percent.diff, 1, 0))
df.A3 <- data.frame(ifelse(df3 > -percent.diff & df3 < percent.diff, 1, 0))
df.A4 <- data.frame(ifelse(df4 > -percent.diff & df4 < percent.diff, 1, 0))
df.A5 <- data.frame(ifelse(df5 > -percent.diff & df5 < percent.diff, 1, 0))
#next add ID columns to each of the newly created dataframes
obs <- 4
#add row and df ID variables to each of the above
df.A1["df.cat"] <- 1
df.A1["row"] <- 1:obs
df.A2["df.cat"] <- 2
df.A2["row"] <- 1:obs
df.A3["df.cat"] <- 3
df.A3["row"] <- 1:obs
df.A4["df.cat"] <- 4
df.A4["row"] <- 1:obs
df.A5["df.cat"] <- 5
df.A5["row"] <- 1:obs
#combine the individual dataframes with IDs into a single dataframe.
Combo.df <-list(df.A1, df.A2, df.A3, df.A4, df.A5)
All.df <- Reduce(rbind, Combo.df)
最终输出应如下所示(仅显示前几行)
X1 X2 X3 X4 X5 df.cat row
a 1 1 1 1 1 1 1
b 1 0 1 1 1 1 2
c 1 0 0 0 0 1 3
d 1 1 1 1 1 1 4
a1 1 1 1 1 1 2 1
b1 1 1 1 1 1 2 2
c1 0 1 0 0 0 2 3
d1 1 1 1 1 1 2 4
a2 1 1 1 1 1 3 1
b2 1 0 1 1 1 3 2
c2 0 0 1 1 1 3 3
d2 1 1 1 1 1 3 4
尝试自动执行上述步骤失败 #
a) created the number of dataframes I will need
num.reps <- 5
obs <- 4
n.cols <- 5
lst <- replicate(num.reps,data.frame(matrix(NA, nrow = obs, ncol = n.cols)), simplify=FALSE)
names(lst) <- paste0('df', 1:num.reps)
list2env(lst, envir = .GlobalEnv)
# b) fill dataframes (not sure how to call up dataframe by sequential names in loop)
# THIS DOES NOT WORK
f.diff.calc <- function(i)
{df[[i]] <-(df[,1:nc] - df[[i]])/(df[[i]])*100}
diff.calc.list <- replicate(5, f.diff.calc(list))
#Error in `[.data.frame`(df, , 1:nc) : undefined columns selected
这是对您的代码的简化,据我所知,它可以满足您的要求。
fun1 <- function(col, DF = test.data){
res <- 100*(DF - DF[[col]])/DF[[col]]
is.na(res) <- is.infinite(as.matrix(res))
res
}
fun2 <- function(DF, percent.diff = 30){
data.frame(ifelse(-percent.diff < DF & DF < percent.diff, 1, 0))
}
df_list <- lapply(seq_len(ncol(test.data)), fun1)
names(df_list) <- paste0("df", seq_along(df_list))
#next will filter each calculated %-value by the specified percent difference filter
# and save the results in a list of dataframes.
percent.diff <- 30
df.A_list <- lapply(df_list, fun2)
#next add ID columns to each of the newly created dataframes
tmp <- names(df.A_list)
df.A_list <- lapply(seq_along(df.A_list), function(i){
df.A_list[[i]][["df.cat"]] <- i
df.A_list[[i]][["row"]] <- seq_len(nrow(df.A_list[[i]]))
df.A_list[[i]]
})
names(df.A_list) <- tmp
# combine the results in one dataframe
All.df <- do.call(rbind, df.A_list)
好吧,我真诚地认为通过更多的研究你可以解决它。此外,我无法重新创建您获得的确切输出,但我能够匹配使用您的代码获得的输出。
这是代码的自动化版本。
a <- c(2.0, 2.4, 2.1, 2.2, 2.3)
b <- c(4.0, 0, 4.5, 4.4, 4.8)
c <- c(0.3, 0.2, 2.0, 2.1, 2.3)
d <- c(5.0, 4.8, 4.8, 4.9, 5.0)
test.data <- data.frame(rbind(a,b,c,d))
#STEP 1: create separate dfs and do different calculations by column in each
#LONG WAY, MANUAL
# calculates % difference between each value with respect to first value in row
# in df1, then second value in row for df2, etc.
nc <- ncol(test.data)
calc<-function(x,percent.diff=30,i){
x[x==Inf] <- NA
obs<-4
x.A<- data.frame(ifelse(x > -percent.diff & x < percent.diff, 1, 0))
x.A$df.cat<-i
x.A$row<-1:obs
return(x.A)
}
output<-data.frame()
for(i in 1:5){
assign(paste('df',i,sep=""),(test.data[,1:nc] - test.data[[i]])/(test.data[[i]])*100)
}
for(i in 1:5){
output<-rbind.data.frame(output,calc(x = get(paste('df',i,sep="")),percent.diff = 30,i=i))
}