从单独的 data.frames 中计算不同列的汇总统计信息

Calculate summary statistics of different columns from separate data.frames

我是 R 的新手。我有一个目录包含许多具有相同结构的数据帧文件(3 列以“,”分隔),如下所示: 日期和时间戳,V2、V3

我应该计算所有文件的 V2 和 V3 列的均值、中位数、SD、方差和偏度,然后将这 10 列保存在单独的文件中。每列(例如,V2 中位数)将包含所有文件中所有 V2 列的中位数。为此我需要你的帮助。

谢谢

如果我对你的问题理解正确,这里有一个使用 data.table 的快速解决方案。

# Load library
  library(data.table)
  library(moments)
  library(readr)



# Get a List of  `.csv` files in your folder
  filenames <- list.files("C:/your/folder", pattern="*.csv", full.names=TRUE)


# Load and bind all data sets into one single data frame
  df <- rbindlist(lapply(filenames,fread))


# run the summary statistics for V2 and V3

output <- df[,.(  V2_mean= mean(V2)
                , V2_median= median(V2)
                , V2_SD= sd(V2)
                , V2_var= var(V2)
                , V2_skw= skewness(V2)
                , V3_mean= mean(V3)
                , V3_median= median(V3)
                , V3_SD= sd(V3)
                , V3_var= var(V3)
                , V3_skw= skewness(V3)) ]


# save output in a different file
  write_csv(output, "output.csv")

还有一个方法:

#This library is needed to compute skewness
library(e1071) 

#Set the directory where your files are present as home directory
setwd("Directory where your files are present")

#file names will be stored
files <- list.files(path = ".",pattern = ".CSV$", ignore.case = TRUE)
file<-NULL
for (i in 1:length(files)){
current_file=read.table(files[i],header = TRUE,sep = ",")
v2_mean=sapply(current_file[2], mean, na.rm = TRUE)
v2_median=sapply(current_file[2], median, na.rm = TRUE)
v2_SD=sapply(current_file[2], sd, na.rm = TRUE)
v2_variance=sapply(current_file[2], var, na.rm = TRUE)
v2_Skew=sapply(current_file[2], skewness, na.rm = TRUE)

v3_mean=sapply(current_file[3], mean, na.rm = TRUE)
v3_median=sapply(current_file[3], median, na.rm = TRUE)
v3_SD=sapply(current_file[3], sd, na.rm = TRUE)
v3_variance=sapply(current_file[3], var, na.rm = TRUE)
v3_Skew=sapply(current_file[3], skewness, na.rm = TRUE)
file<-rbind.data.frame(file,c(v2_mean,v2_median,v2_SD,v2_variance,v2_Skew,v3_mean,v3_median,v3_SD,v3_variance,v3_Skew))
}
names(file)<-c("v2_mean","v2_median","v2_SD","v2_variance","v2_Skew","v3_mean","v3_median","v3_SD","v3_variance","v3_Skew")

#Final file will be saved in the home directory
write.csv(file, "file_stats.csv")