从 R 中的目录读取多个文本文件时出错
Error in reading multple text files from directory in R
我想从我的目录中读取多个文本文件,这些文件按以下格式排列
regional_vol_GM_atlas1.txt
regional_vol_GM_atlas2.txt
........
regional_vol_GM_atlas152.txt
文件中的数据采用以下格式
667869 667869
580083 580083
316133 316133
3631 3631
以下是我写的脚本
library(readr)
library(stringr)
library(data.table)
array <- c()
for (file in dir(/media/dev/Daten/Task1/subject1/t1)) # path to the directory where .txt files are located
{
row4 <- read.table(file=list.files(pattern ="regional_vol*.txt"),
header = FALSE,
row.names = NULL,
skip = 3, # Skip the 1st 3 rows
nrows = 1, # Read only the next row after skipping the 1st 3 rows
sep = "\t") # change the separator if it is not "\t"
array <- cbind(array, row4)
}
我遇到了以下错误
Error in file(file, "rt") : invalid 'description' argument
请指出我在脚本中的错误之处
这对我来说似乎很好用。如果文件有 headers,请根据代码注释进行更改:
[已编辑答案以反映 OP 发布的新信息]
# rm(list=ls()) #clean memory if you can afford to
mydir<- "~/Desktop/a" #change as per your path
# read full paths
myfiles<- list.files(mydir,pattern = "regional_vol*",full.names=T)
myfiles #check that files listed correctly
# initialise the dataframe from first file
# change header =T/F depending on presence of header
# make sure sep is correct
df<- read.csv( myfiles[1], header = F, skip = 0, nrows = 4, sep="" )[-c(1:3),]
#check that first line was read correctly
df
#read all the other files and update dataframe
#we read 4 lines to read the header correctly, then remove 3
ans<- lapply(myfiles[-1], function(x){ read.csv( x, header = F, skip = 0, nrows = 4, sep="")[-c(1:3),] })
ans
#update dataframe
lapply(ans, function(x){df<<-rbind(df,x)} )
#this should be the required dataframe
df
此外,如果您使用 Linux,一个更简单的方法就是让 OS 为您完成
awk 'FNR == 4' regional_vol*.txt
这应该可以为您完成。
# set the working directory (where files are saved)
setwd("C:/Users/your_path_here/Desktop/")
file_names = list.files(getwd())
file_names = file_names[grepl(".TXT",file_names)]
# print file_names vector
file_names
# read the WY.TXT file, just for testing
# file = read.csv("C:/Users/your_path_here/Desktop/regional_vol_GM_atlas1.txt", header=F, stringsAsFactors=F)
# see the data structure
str(file)
# run read.csv on all values of file_names
files = lapply(file_names, read.csv, header=F, stringsAsFactors = F)
files = do.call(rbind,files)
# set column names
names(files) = c("field1", "field2", "field3", "field4", "field5")
str(files)
write.table(files, "C:/Users/your_path_here/Desktop/mydata.txt", sep="\t")
write.csv(files,"C:/Users/your_path_here/Desktop/mydata.csv")
我想从我的目录中读取多个文本文件,这些文件按以下格式排列
regional_vol_GM_atlas1.txt
regional_vol_GM_atlas2.txt
........
regional_vol_GM_atlas152.txt
文件中的数据采用以下格式
667869 667869
580083 580083
316133 316133
3631 3631
以下是我写的脚本
library(readr)
library(stringr)
library(data.table)
array <- c()
for (file in dir(/media/dev/Daten/Task1/subject1/t1)) # path to the directory where .txt files are located
{
row4 <- read.table(file=list.files(pattern ="regional_vol*.txt"),
header = FALSE,
row.names = NULL,
skip = 3, # Skip the 1st 3 rows
nrows = 1, # Read only the next row after skipping the 1st 3 rows
sep = "\t") # change the separator if it is not "\t"
array <- cbind(array, row4)
}
我遇到了以下错误
Error in file(file, "rt") : invalid 'description' argument
请指出我在脚本中的错误之处
这对我来说似乎很好用。如果文件有 headers,请根据代码注释进行更改: [已编辑答案以反映 OP 发布的新信息]
# rm(list=ls()) #clean memory if you can afford to
mydir<- "~/Desktop/a" #change as per your path
# read full paths
myfiles<- list.files(mydir,pattern = "regional_vol*",full.names=T)
myfiles #check that files listed correctly
# initialise the dataframe from first file
# change header =T/F depending on presence of header
# make sure sep is correct
df<- read.csv( myfiles[1], header = F, skip = 0, nrows = 4, sep="" )[-c(1:3),]
#check that first line was read correctly
df
#read all the other files and update dataframe
#we read 4 lines to read the header correctly, then remove 3
ans<- lapply(myfiles[-1], function(x){ read.csv( x, header = F, skip = 0, nrows = 4, sep="")[-c(1:3),] })
ans
#update dataframe
lapply(ans, function(x){df<<-rbind(df,x)} )
#this should be the required dataframe
df
此外,如果您使用 Linux,一个更简单的方法就是让 OS 为您完成
awk 'FNR == 4' regional_vol*.txt
这应该可以为您完成。
# set the working directory (where files are saved)
setwd("C:/Users/your_path_here/Desktop/")
file_names = list.files(getwd())
file_names = file_names[grepl(".TXT",file_names)]
# print file_names vector
file_names
# read the WY.TXT file, just for testing
# file = read.csv("C:/Users/your_path_here/Desktop/regional_vol_GM_atlas1.txt", header=F, stringsAsFactors=F)
# see the data structure
str(file)
# run read.csv on all values of file_names
files = lapply(file_names, read.csv, header=F, stringsAsFactors = F)
files = do.call(rbind,files)
# set column names
names(files) = c("field1", "field2", "field3", "field4", "field5")
str(files)
write.table(files, "C:/Users/your_path_here/Desktop/mydata.txt", sep="\t")
write.csv(files,"C:/Users/your_path_here/Desktop/mydata.csv")