读取多个输入文件并为每个输入文件写入多个输出文件的脚本?
Script to read multiple input file and writing multiple output file for each input file?
我有多个如下格式的输入文件需要处理。
输入文件路径/tmp/input
.
1.1.1.txt
1.1.2.txt
1.1.3.txt
但是,我想在另一个文件夹中为每个输入文件生成输出文件,假设 (/tmp/outputsmgr
) 如下所示:
1.1.1_output.csv
1.1.2_output.csv
1.1.3_output.csv
问题是:
- 首先,我无法将输出文件写入 another/different 文件夹
- 其次,所有输入文件数据在处理后合并到输入文件夹中的一个文件中,如下所示,而不是每个输入文件单独的输出文件
以下所有文件都包含相同的数据 1.1.1.txt
数据应在文件 1.1.1_output.csv
中,文件 1.1.2.txt
数据应在文件 1.1.2_output.csv
.[=20= 中]
1.1.1.txt_output.csv
1.1.2.txt_output.csv
1.1.3.txt_output.csv
如何修改以下代码以获得所需的结果?
import os
import csv
import re
def parseFile(fileName):
# We are using a dictionary to store info for each file
data = list()
# data = dict()
fh = open(fileName, "r")
lines = fh.readlines()[1:]
for line in lines:
line = line.rstrip("\n")
if re.search("sessmgr", line):
splitted = line.split()
temp = dict()
temp["CPU"] = splitted[0]
temp["facility"] = splitted[1]
temp["instance"] = splitted[2]
temp["cpu-used"] = splitted[3]
temp["cpu-allc"] = splitted[4]
temp["mem-used"] = splitted[5]
temp["mem-allc"] = splitted[6]
temp["files-used"] = splitted[7]
temp["files-allc"] = splitted[8]
temp["sessions-used"] = splitted[9]
temp["sessions-allc"] = splitted[10]
# print (splitted[2])
data.append(temp)
# continue;
# print (data)
return data
if __name__ == "__main__":
inputsDirectory = "/tmp/input"
outputDirectory = "/tmp/outputsmgr"
path = os.path.abspath(inputsDirectory)
pathout = os.path.abspath(outputDirectory)
fileLists = ["{0}/{1}".format(path,x) for x in os.listdir(outputDirectory)]
fileList = ["{0}/{1}".format(path,x) for x in os.listdir(inputsDirectory)]
# print(fileList)
csvRows = []
for file in fileList:
newRow = parseFile(file)
csvRows.append(newRow)
# print(csvRows)
for files in fileList:
outputFile = "output.csv"
csvfile = open(os.path.join(files + "_" + outputFile), 'w')
fieldnames = ["CPU",
"facility",
"instance",
"cpu-used",
"cpu-allc",
"mem-used",
"mem-allc",
"files-used",
"files-allc",
"sessions-used",
"sessions-allc"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
headers = {}
for n in writer.fieldnames:
headers[n] = n
writer.writerow(headers)
# writer.writeheader()
for row in csvRows:
for obj in row:
print (obj)
writer.writerow(obj)
我认为下面的代码可以满足您的要求。它按顺序处理输入目录中的文件,并将 parseFile()
函数返回的结果写入输出目录中相应的输出文件。从每个输入文件中获取一组新的 csvRows
并将(仅)写入每个输出文件非常重要。
代码假定 outputDirectory
已经存在,但如果不是这种情况,则您需要添加代码以在处理任何文件之前创建它。 提示: 将 os.path.exists() and os.path.isdir()
与 os.makedirs()
结合使用。
import csv
import os
import re
def parseFile(filePath, fieldnames, target_re=r"sessmgr"):
""" Yield lines of file matching target regex. """
with open(filePath, "r") as file:
next(file) # Skip/ignore first line.
for line in file:
if re.search(target_re, line):
yield dict(zip(fieldnames, line.split()))
if __name__ == "__main__":
OUTPUT_FILE_SUFFIX = "output.csv"
inputsDirectory = "/tmp/input"
outputDirectory = "/tmp/outputsmgr"
fieldnames = ("CPU", "facility", "instance", "cpu-used", "cpu-allc", "mem-used",
"mem-allc", "files-used", "files-allc", "sessions-used",
"sessions-allc")
input_dir = os.path.abspath(inputsDirectory)
output_dir = os.path.abspath(outputDirectory)
for in_filename in os.listdir(input_dir):
in_filepath = os.path.join(input_dir, in_filename)
print('in_filepath: "{}"'.format(in_filepath))
in_rootname = os.path.splitext(in_filename)[0]
out_filename = in_rootname + "_" + OUTPUT_FILE_SUFFIX
out_filepath = os.path.join(output_dir, out_filename)
print('out_filepath: "{}"'.format(out_filepath))
with open(out_filepath, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(parseFile(in_filepath, fieldnames))
我有多个如下格式的输入文件需要处理。
输入文件路径/tmp/input
.
1.1.1.txt
1.1.2.txt
1.1.3.txt
但是,我想在另一个文件夹中为每个输入文件生成输出文件,假设 (/tmp/outputsmgr
) 如下所示:
1.1.1_output.csv
1.1.2_output.csv
1.1.3_output.csv
问题是:
- 首先,我无法将输出文件写入 another/different 文件夹
- 其次,所有输入文件数据在处理后合并到输入文件夹中的一个文件中,如下所示,而不是每个输入文件单独的输出文件
以下所有文件都包含相同的数据 1.1.1.txt
数据应在文件 1.1.1_output.csv
中,文件 1.1.2.txt
数据应在文件 1.1.2_output.csv
.[=20= 中]
1.1.1.txt_output.csv
1.1.2.txt_output.csv
1.1.3.txt_output.csv
如何修改以下代码以获得所需的结果?
import os
import csv
import re
def parseFile(fileName):
# We are using a dictionary to store info for each file
data = list()
# data = dict()
fh = open(fileName, "r")
lines = fh.readlines()[1:]
for line in lines:
line = line.rstrip("\n")
if re.search("sessmgr", line):
splitted = line.split()
temp = dict()
temp["CPU"] = splitted[0]
temp["facility"] = splitted[1]
temp["instance"] = splitted[2]
temp["cpu-used"] = splitted[3]
temp["cpu-allc"] = splitted[4]
temp["mem-used"] = splitted[5]
temp["mem-allc"] = splitted[6]
temp["files-used"] = splitted[7]
temp["files-allc"] = splitted[8]
temp["sessions-used"] = splitted[9]
temp["sessions-allc"] = splitted[10]
# print (splitted[2])
data.append(temp)
# continue;
# print (data)
return data
if __name__ == "__main__":
inputsDirectory = "/tmp/input"
outputDirectory = "/tmp/outputsmgr"
path = os.path.abspath(inputsDirectory)
pathout = os.path.abspath(outputDirectory)
fileLists = ["{0}/{1}".format(path,x) for x in os.listdir(outputDirectory)]
fileList = ["{0}/{1}".format(path,x) for x in os.listdir(inputsDirectory)]
# print(fileList)
csvRows = []
for file in fileList:
newRow = parseFile(file)
csvRows.append(newRow)
# print(csvRows)
for files in fileList:
outputFile = "output.csv"
csvfile = open(os.path.join(files + "_" + outputFile), 'w')
fieldnames = ["CPU",
"facility",
"instance",
"cpu-used",
"cpu-allc",
"mem-used",
"mem-allc",
"files-used",
"files-allc",
"sessions-used",
"sessions-allc"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
headers = {}
for n in writer.fieldnames:
headers[n] = n
writer.writerow(headers)
# writer.writeheader()
for row in csvRows:
for obj in row:
print (obj)
writer.writerow(obj)
我认为下面的代码可以满足您的要求。它按顺序处理输入目录中的文件,并将 parseFile()
函数返回的结果写入输出目录中相应的输出文件。从每个输入文件中获取一组新的 csvRows
并将(仅)写入每个输出文件非常重要。
代码假定 outputDirectory
已经存在,但如果不是这种情况,则您需要添加代码以在处理任何文件之前创建它。 提示: 将 os.path.exists() and os.path.isdir()
与 os.makedirs()
结合使用。
import csv
import os
import re
def parseFile(filePath, fieldnames, target_re=r"sessmgr"):
""" Yield lines of file matching target regex. """
with open(filePath, "r") as file:
next(file) # Skip/ignore first line.
for line in file:
if re.search(target_re, line):
yield dict(zip(fieldnames, line.split()))
if __name__ == "__main__":
OUTPUT_FILE_SUFFIX = "output.csv"
inputsDirectory = "/tmp/input"
outputDirectory = "/tmp/outputsmgr"
fieldnames = ("CPU", "facility", "instance", "cpu-used", "cpu-allc", "mem-used",
"mem-allc", "files-used", "files-allc", "sessions-used",
"sessions-allc")
input_dir = os.path.abspath(inputsDirectory)
output_dir = os.path.abspath(outputDirectory)
for in_filename in os.listdir(input_dir):
in_filepath = os.path.join(input_dir, in_filename)
print('in_filepath: "{}"'.format(in_filepath))
in_rootname = os.path.splitext(in_filename)[0]
out_filename = in_rootname + "_" + OUTPUT_FILE_SUFFIX
out_filepath = os.path.join(output_dir, out_filename)
print('out_filepath: "{}"'.format(out_filepath))
with open(out_filepath, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(parseFile(in_filepath, fieldnames))