R - 尝试遍历文件夹和子文件夹,并将文件信息保存到 table
R - Trying to loop over a folder and subfolders, and save files info into a table
我正在尝试遍历文件夹及其子文件夹,并将所有文件信息(大小、mdate 等)保存到 table 中。
我写了下面的代码,但它没有像我计划的那样工作,因为我在每一行中得到 'folder/filename' 作为文件名和 'main folder' 作为文件夹。
有人可以帮助我吗?
谢谢。
########################################################
########################################################
# Example: #
# The path 'c:/test' contains 1 file and 3 subfolders #
# with 2 files in each -> 7 file in total #
#
# c:/test
# file_000.txt
#
# + subfolder1
# + file_001.docx
# + file_002.txt
# + subfolder2
# + file_003.docx
# + file_004.bmp
# + subfolder3
# + file_005.xlsx
# + file_006.txt
########################################################
# Create the empty dataframe 'table'
table = data.frame()
# Set the working path
path = 'c:/test'
# List all folders and subfolders in path
folders_list = list.dirs(path, recursive = TRUE)
# Start looping in folders
for(folder in folders_list){
setwd(folder)
dir = getwd()
# Get files list in each folder
files_list = list.files(folder, recursive = TRUE)
# Start looping in files
for(file in files_list){
# Get info about each file and append below the previous line in the dataframe 'table'
table = rbind(table, file.info(file))
}
# Add a column withe the file name to each line in 'table'
table$file = row.names(file)
# Try to add the folder path to another column 'folder'
table$folder = dir
}
# Show 'table'
View(table)
##############################################
# RESULT (head(table)):
# size isdir mode mtime ctime atime exe folder
# file_000.txt 0 FALSE 666 2016-10-27 17:19:40 2016-10-27 17:19:40 2016-10-27 17:19:40 no c:/test
# subfolder1/file_001.docx 0 FALSE 666 2016-10-27 17:57:00 2016-10-27 17:57:00 2016-10-27 17:57:00 no c:/test
# subfolder1/file_002.txt 0 FALSE 666 2016-10-27 17:18:34 2016-10-27 17:18:34 2016-10-27 17:18:34 no c:/test
# subfolder2/file_003.docx 0 FALSE 666 2016-10-27 17:57:29 2016-10-27 17:57:29 2016-10-27 17:57:29 no c:/test
# subfolder2/file_004.bmp 0 FALSE 666 2016-10-27 17:19:00 2016-10-27 17:19:00 2016-10-27 17:19:00 no c:/test
# subfolder3/file_005.xlsx 8081 FALSE 666 2016-10-27 17:57:52 2016-10-27 17:57:52 2016-10-27 17:57:52 no c:/test
##############################################
##############################################
# NEEDED:
# size isdir mode mtime ctime atime exe folder
# file_000.txt 0 FALSE 666 2016-10-27 17:19:40 2016-10-27 17:19:40 2016-10-27 17:19:40 no c:/test
# file_001.docx 0 FALSE 666 2016-10-27 17:57:00 2016-10-27 17:57:00 2016-10-27 17:57:00 no c:/test/subfolder1
# file_002.txt 0 FALSE 666 2016-10-27 17:18:34 2016-10-27 17:18:34 2016-10-27 17:18:34 no c:/test/subfolder1
# file_003.docx 0 FALSE 666 2016-10-27 17:57:29 2016-10-27 17:57:29 2016-10-27 17:57:29 no c:/test/subfolder2
# file_004.bmp 0 FALSE 666 2016-10-27 17:19:00 2016-10-27 17:19:00 2016-10-27 17:19:00 no c:/test/subfolder2
# file_005.xlsx 8081 FALSE 666 2016-10-27 17:57:52 2016-10-27 17:57:52 2016-10-27 17:57:52 no c:/test/subfolder3
##############################################
像这样的东西应该可以工作:
files <- list.files("test", full.names = TRUE, recursive = TRUE)
file_info <- lapply(files, file.info)
table <- do.call(rbind, file_info)
table$file <- gsub(".*/(.+)$", "\1", rownames(table))
table$folder <- gsub("(.*)/.+$", "\1", rownames(table))
我正在尝试遍历文件夹及其子文件夹,并将所有文件信息(大小、mdate 等)保存到 table 中。 我写了下面的代码,但它没有像我计划的那样工作,因为我在每一行中得到 'folder/filename' 作为文件名和 'main folder' 作为文件夹。 有人可以帮助我吗? 谢谢。
########################################################
########################################################
# Example: #
# The path 'c:/test' contains 1 file and 3 subfolders #
# with 2 files in each -> 7 file in total #
#
# c:/test
# file_000.txt
#
# + subfolder1
# + file_001.docx
# + file_002.txt
# + subfolder2
# + file_003.docx
# + file_004.bmp
# + subfolder3
# + file_005.xlsx
# + file_006.txt
########################################################
# Create the empty dataframe 'table'
table = data.frame()
# Set the working path
path = 'c:/test'
# List all folders and subfolders in path
folders_list = list.dirs(path, recursive = TRUE)
# Start looping in folders
for(folder in folders_list){
setwd(folder)
dir = getwd()
# Get files list in each folder
files_list = list.files(folder, recursive = TRUE)
# Start looping in files
for(file in files_list){
# Get info about each file and append below the previous line in the dataframe 'table'
table = rbind(table, file.info(file))
}
# Add a column withe the file name to each line in 'table'
table$file = row.names(file)
# Try to add the folder path to another column 'folder'
table$folder = dir
}
# Show 'table'
View(table)
##############################################
# RESULT (head(table)):
# size isdir mode mtime ctime atime exe folder
# file_000.txt 0 FALSE 666 2016-10-27 17:19:40 2016-10-27 17:19:40 2016-10-27 17:19:40 no c:/test
# subfolder1/file_001.docx 0 FALSE 666 2016-10-27 17:57:00 2016-10-27 17:57:00 2016-10-27 17:57:00 no c:/test
# subfolder1/file_002.txt 0 FALSE 666 2016-10-27 17:18:34 2016-10-27 17:18:34 2016-10-27 17:18:34 no c:/test
# subfolder2/file_003.docx 0 FALSE 666 2016-10-27 17:57:29 2016-10-27 17:57:29 2016-10-27 17:57:29 no c:/test
# subfolder2/file_004.bmp 0 FALSE 666 2016-10-27 17:19:00 2016-10-27 17:19:00 2016-10-27 17:19:00 no c:/test
# subfolder3/file_005.xlsx 8081 FALSE 666 2016-10-27 17:57:52 2016-10-27 17:57:52 2016-10-27 17:57:52 no c:/test
##############################################
##############################################
# NEEDED:
# size isdir mode mtime ctime atime exe folder
# file_000.txt 0 FALSE 666 2016-10-27 17:19:40 2016-10-27 17:19:40 2016-10-27 17:19:40 no c:/test
# file_001.docx 0 FALSE 666 2016-10-27 17:57:00 2016-10-27 17:57:00 2016-10-27 17:57:00 no c:/test/subfolder1
# file_002.txt 0 FALSE 666 2016-10-27 17:18:34 2016-10-27 17:18:34 2016-10-27 17:18:34 no c:/test/subfolder1
# file_003.docx 0 FALSE 666 2016-10-27 17:57:29 2016-10-27 17:57:29 2016-10-27 17:57:29 no c:/test/subfolder2
# file_004.bmp 0 FALSE 666 2016-10-27 17:19:00 2016-10-27 17:19:00 2016-10-27 17:19:00 no c:/test/subfolder2
# file_005.xlsx 8081 FALSE 666 2016-10-27 17:57:52 2016-10-27 17:57:52 2016-10-27 17:57:52 no c:/test/subfolder3
##############################################
像这样的东西应该可以工作:
files <- list.files("test", full.names = TRUE, recursive = TRUE)
file_info <- lapply(files, file.info)
table <- do.call(rbind, file_info)
table$file <- gsub(".*/(.+)$", "\1", rownames(table))
table$folder <- gsub("(.*)/.+$", "\1", rownames(table))