重命名 Python 中一个文件夹中的多个 csv 文件
Renaming multiple csv files within a folder in Python
我有一个包含 50 个 .csv 文件的文件夹。 .csv 文件是自动生成的,是基于过程的模型的结果/输出(长且自动命名)。例如,sandbox_username_vetch_scaleup_IA_1.csv; sandbox_username_vetch_scaleup_IA_2.csv,一直持续到 sandbox_username_vetch_scaleup_IA_50.csv。
我正在尝试以某种方式缩短文件名,以便文件名是 IA_1、IA_2 ...直到 IA_50 以及随后的新 .csv文件名作为列添加到数据框中。这是我到目前为止尝试过的
# import necessary libraries
import pandas as pd
import os
import glob
import sys
from pathlib import Path
import re
data_p = "/Users/Username/Documents/HV_Scale/CWAD"
output_p = "/Users/Username/Documents/HV_Scale/CWAD"
retval = os.getcwd()
print (retval) # see in which folder you are
os.chdir(data_p) # move to the folder with your data
os.getcwd()
filenames = sorted(glob.glob('*.csv'))
fnames = list(filenames) # get the names of all your files
#print(fnames)
#Loop over
for f in range(len(fnames)):
print(f'fname: {fnames[f]}\n')
pfile = pd.read_csv(fnames[f], delimiter=",") # read in file
#extract filename
filename = fnames[f]
parts = filename.split(".") # giving you the number in file name and .csv
only_id = parts[0].split("_") # if there is a bracket included
# get IA from your file
filestate = pfile["IA"][0] # assuming this is on the first row
filestate = str(filestate)
# get new filename
newfilename = only_id[0]+"-"+filestate+parts[1]
# save your file (don't put a slash at the end of your directories on top)
pfile.to_csv(output_p+"/"+newfilename, index = False, header = True)
这是将 csv 文件名添加为列的代码
import glob
import os
import shutil
import sys
import pandas as pd
path = '/Users/Username/Documents/HV_Scale/IA_CWAD/short'
all_files = glob.glob(os.path.join(path, "*.csv"))
names = [os.path.basename(x) for x in glob.glob(path+'\*.csv')]
df = pd.DataFrame()
for file_ in all_files:
file_df = pd.read_csv(file_,sep=';', parse_dates=[0], infer_datetime_format=True,header=None )
file_df['file_name'] = file_
df = df.append(file_df)
#但是,这会添加旧的 csv 文件名而不是重命名的文件名
要重命名和移动这些文件,您只需要:
import glob
import os
import shutil
import sys
SOURCE = '<Your source directory>'
TARGET = '<Your target directory>'
for file in glob.glob(os.path.join(SOURCE, '*_IA_*.csv')):
idx = file.index('_IA_')
filename = file[idx+1:]
target = os.path.join(TARGET, filename)
if os.path.exists(target):
print(f'Target file {target} already exists', file=sys.stderr)
else:
shutil.copy(file, target)
由于 OP 的问题中没有任何内容试图处理 CSV 文件的修改,因此留作 OP 的练习。
源目录和目标目录应该不同,否则会导致结果不明确
我有一个包含 50 个 .csv 文件的文件夹。 .csv 文件是自动生成的,是基于过程的模型的结果/输出(长且自动命名)。例如,sandbox_username_vetch_scaleup_IA_1.csv; sandbox_username_vetch_scaleup_IA_2.csv,一直持续到 sandbox_username_vetch_scaleup_IA_50.csv。
我正在尝试以某种方式缩短文件名,以便文件名是 IA_1、IA_2 ...直到 IA_50 以及随后的新 .csv文件名作为列添加到数据框中。这是我到目前为止尝试过的
# import necessary libraries
import pandas as pd
import os
import glob
import sys
from pathlib import Path
import re
data_p = "/Users/Username/Documents/HV_Scale/CWAD"
output_p = "/Users/Username/Documents/HV_Scale/CWAD"
retval = os.getcwd()
print (retval) # see in which folder you are
os.chdir(data_p) # move to the folder with your data
os.getcwd()
filenames = sorted(glob.glob('*.csv'))
fnames = list(filenames) # get the names of all your files
#print(fnames)
#Loop over
for f in range(len(fnames)):
print(f'fname: {fnames[f]}\n')
pfile = pd.read_csv(fnames[f], delimiter=",") # read in file
#extract filename
filename = fnames[f]
parts = filename.split(".") # giving you the number in file name and .csv
only_id = parts[0].split("_") # if there is a bracket included
# get IA from your file
filestate = pfile["IA"][0] # assuming this is on the first row
filestate = str(filestate)
# get new filename
newfilename = only_id[0]+"-"+filestate+parts[1]
# save your file (don't put a slash at the end of your directories on top)
pfile.to_csv(output_p+"/"+newfilename, index = False, header = True)
这是将 csv 文件名添加为列的代码
import glob
import os
import shutil
import sys
import pandas as pd
path = '/Users/Username/Documents/HV_Scale/IA_CWAD/short'
all_files = glob.glob(os.path.join(path, "*.csv"))
names = [os.path.basename(x) for x in glob.glob(path+'\*.csv')]
df = pd.DataFrame()
for file_ in all_files:
file_df = pd.read_csv(file_,sep=';', parse_dates=[0], infer_datetime_format=True,header=None )
file_df['file_name'] = file_
df = df.append(file_df)
#但是,这会添加旧的 csv 文件名而不是重命名的文件名
要重命名和移动这些文件,您只需要:
import glob
import os
import shutil
import sys
SOURCE = '<Your source directory>'
TARGET = '<Your target directory>'
for file in glob.glob(os.path.join(SOURCE, '*_IA_*.csv')):
idx = file.index('_IA_')
filename = file[idx+1:]
target = os.path.join(TARGET, filename)
if os.path.exists(target):
print(f'Target file {target} already exists', file=sys.stderr)
else:
shutil.copy(file, target)
由于 OP 的问题中没有任何内容试图处理 CSV 文件的修改,因此留作 OP 的练习。
源目录和目标目录应该不同,否则会导致结果不明确