将数据从文本文件保存到 python 中的 CSV 文件,分隔符为 space
saving data from text file to CSV file in python having delimiter as space
我有一个巨大的报告文件。我从中提取了所需的数据到一个名为“new.txt”的新文件中
我想将此数据保存在 csv 文件中,以便我为文件中定义的每个 headers 正确获取列和行。
我无法以正确的形式获取它,因为我们在 excel 中获取了它。我正在使用 python 2.7 并且想在不使用 pandas 包的情况下执行此操作。
SIMPLE_FILE 报告:
CALL Alias Severity File Line Wt Message
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 785 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1111 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1226 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1354 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1363 1000 message for this block ( syn_ff ) is ignored by syn
这是我的代码。
import os
import sys
from os import path
import struct
outFile = "new.txt"
new_file = open(outFile, "a+")
new_file.truncate(0)
csv_file = "report.csv"
reqlen = 0 # minumum length of row
fs = None
def calclens(line): # calc column widths from header row
global fs, reqlen
rec = [] # all column widths
ctr = 0
for i in range(len(line)-1):
ctr += 1
if line[i] == ' ' and line[i+1] != ' ': # new column
rec.append(ctr) # add to column width list
ctr = 0
rec.append(ctr) # last column
#print rec
rec = rec[:-1] # skip newline
rec[-1] += len(line)-i-1 # last column to end of line
fieldwidths = rec
fmtstring = ' '.join('{}{}'.format(abs(fw), 's') for fw in fieldwidths) # 16s 16s 10s 166s 7s 5s 52s
fs = struct.Struct(fmtstring)
reqlen = len(line)
def open_file(filename):
try:
contents = []
with open(filename, 'r') as f1:
contents=[line.strip() for line in f1]
counter = contents.index("MORESIMPLE_FILESORT REPORT:")
for item in contents[counter:]:
new_file.write(item+"\n")
with open("new.txt") as f:
with open(csv_file,'w') as f2:
for i,line in enumerate(f.readlines()):
if i == 0: continue # SIMPLE_FILE REPORT:
if line[0] == '+': continue # skip ++++ line
if i == 1: calclens(line) # header row, calc field positions\lengths
#print(len(line))
if len(line) < reqlen: line += ' '*(reqlen-len(line))
fields = fs.unpack_from(str.encode(line))
fields = [f.strip() for f in fields] # trim all fields
f2.write(','.join(fields)+'\n') # join fields comma separated
except Exception,e:
print str(e)
exit(1)
if __name__ == '__main__':
name = sys.argv[1]
filename = sys.argv[1]
open_file(filename)
要拆分 固定宽度文本 文件,您可以使用 struct 模块。
下面的代码使用 header 行来确定列的位置和宽度,因此 header 行的长度必须与数据行的长度相同,并且 headers 必须是正确对齐。
此代码针对 Python 2.7。对于 Python 3,记录了所需的更改。
import struct
ss = '''
SIMPLE_FILE REPORT:
CALL Alias Severity File Line Wt Message
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 785 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1111 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1226 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1354 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1363 1000 message for this block ( syn_ff ) is ignored by syn
'''.strip()
with open ('data.txt','w') as f: f.write(ss) # write test file
##################### Main Script #######################
reqlen = 0 # minumum length of row
fs = None
def getmaxlinelen(filename): # scan file for longest line length
mx = 0
with open(filename) as f:
for ln in f.readlines():
if len(ln) > mx: mx = len(ln)
return mx
def calclens(line, mxlen): # calc column widths from header row
global fs, reqlen
rec = [] # all column widths
ctr = 0
for i in range(len(line)-1):
ctr += 1
if line[i] == ' ' and line[i+1] != ' ': # new column
rec.append(ctr) # add to column width list
ctr = 0
rec.append(ctr) # last column
rec = rec[:-1] # skip newline
rec[-1] += mxlen-i-1 # last column to end of line (max len data line)
fieldwidths = rec
fmtstring = ' '.join('{}{}'.format(abs(fw), 's') for fw in fieldwidths) # 16s 16s 10s 166s 7s 5s 52s
fs = struct.Struct(fmtstring)
reqlen = mxlen #len(line)
mxlen = getmaxlinelen('data.txt')
with open('data.txt') as f:
with open('data.csv','w') as f2:
for i,line in enumerate(f.readlines()):
if i == 0: continue # SIMPLE_FILE REPORT:
if line[0] == '+': continue # skip ++++ line
if i == 1: calclens(line, mxlen) # header row, calc field positions\lengths
if len(line) < reqlen: line += ' '*(reqlen-len(line)) # line length must match longest length
fields = fs.unpack_from(str.encode(line)) # python 2
#fields = tuple(s.decode() for s in fs.unpack(line.encode())) # python 3
fields = [f.strip().replace(',','') for f in fields] # trim all fields
f2.write(','.join(fields)+'\n') # join fields comma separated
输出(data.csv)
CALL,Alias,Severity,File,Line,Wt,Message
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,785,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1111,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1226,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1354,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1363,1000,message for this block ( syn_ff ) is ignored by syn
我有一个巨大的报告文件。我从中提取了所需的数据到一个名为“new.txt”的新文件中 我想将此数据保存在 csv 文件中,以便我为文件中定义的每个 headers 正确获取列和行。
我无法以正确的形式获取它,因为我们在 excel 中获取了它。我正在使用 python 2.7 并且想在不使用 pandas 包的情况下执行此操作。
SIMPLE_FILE 报告:
CALL Alias Severity File Line Wt Message
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 785 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1111 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1226 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1354 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1363 1000 message for this block ( syn_ff ) is ignored by syn
这是我的代码。
import os
import sys
from os import path
import struct
outFile = "new.txt"
new_file = open(outFile, "a+")
new_file.truncate(0)
csv_file = "report.csv"
reqlen = 0 # minumum length of row
fs = None
def calclens(line): # calc column widths from header row
global fs, reqlen
rec = [] # all column widths
ctr = 0
for i in range(len(line)-1):
ctr += 1
if line[i] == ' ' and line[i+1] != ' ': # new column
rec.append(ctr) # add to column width list
ctr = 0
rec.append(ctr) # last column
#print rec
rec = rec[:-1] # skip newline
rec[-1] += len(line)-i-1 # last column to end of line
fieldwidths = rec
fmtstring = ' '.join('{}{}'.format(abs(fw), 's') for fw in fieldwidths) # 16s 16s 10s 166s 7s 5s 52s
fs = struct.Struct(fmtstring)
reqlen = len(line)
def open_file(filename):
try:
contents = []
with open(filename, 'r') as f1:
contents=[line.strip() for line in f1]
counter = contents.index("MORESIMPLE_FILESORT REPORT:")
for item in contents[counter:]:
new_file.write(item+"\n")
with open("new.txt") as f:
with open(csv_file,'w') as f2:
for i,line in enumerate(f.readlines()):
if i == 0: continue # SIMPLE_FILE REPORT:
if line[0] == '+': continue # skip ++++ line
if i == 1: calclens(line) # header row, calc field positions\lengths
#print(len(line))
if len(line) < reqlen: line += ' '*(reqlen-len(line))
fields = fs.unpack_from(str.encode(line))
fields = [f.strip() for f in fields] # trim all fields
f2.write(','.join(fields)+'\n') # join fields comma separated
except Exception,e:
print str(e)
exit(1)
if __name__ == '__main__':
name = sys.argv[1]
filename = sys.argv[1]
open_file(filename)
要拆分 固定宽度文本 文件,您可以使用 struct 模块。
下面的代码使用 header 行来确定列的位置和宽度,因此 header 行的长度必须与数据行的长度相同,并且 headers 必须是正确对齐。
此代码针对 Python 2.7。对于 Python 3,记录了所需的更改。
import struct
ss = '''
SIMPLE_FILE REPORT:
CALL Alias Severity File Line Wt Message
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 785 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1111 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1226 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1354 1000 message for this block ( syn_ff ) is ignored by syn
ACT_99 ACT_99 Warning /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png 1363 1000 message for this block ( syn_ff ) is ignored by syn
'''.strip()
with open ('data.txt','w') as f: f.write(ss) # write test file
##################### Main Script #######################
reqlen = 0 # minumum length of row
fs = None
def getmaxlinelen(filename): # scan file for longest line length
mx = 0
with open(filename) as f:
for ln in f.readlines():
if len(ln) > mx: mx = len(ln)
return mx
def calclens(line, mxlen): # calc column widths from header row
global fs, reqlen
rec = [] # all column widths
ctr = 0
for i in range(len(line)-1):
ctr += 1
if line[i] == ' ' and line[i+1] != ' ': # new column
rec.append(ctr) # add to column width list
ctr = 0
rec.append(ctr) # last column
rec = rec[:-1] # skip newline
rec[-1] += mxlen-i-1 # last column to end of line (max len data line)
fieldwidths = rec
fmtstring = ' '.join('{}{}'.format(abs(fw), 's') for fw in fieldwidths) # 16s 16s 10s 166s 7s 5s 52s
fs = struct.Struct(fmtstring)
reqlen = mxlen #len(line)
mxlen = getmaxlinelen('data.txt')
with open('data.txt') as f:
with open('data.csv','w') as f2:
for i,line in enumerate(f.readlines()):
if i == 0: continue # SIMPLE_FILE REPORT:
if line[0] == '+': continue # skip ++++ line
if i == 1: calclens(line, mxlen) # header row, calc field positions\lengths
if len(line) < reqlen: line += ' '*(reqlen-len(line)) # line length must match longest length
fields = fs.unpack_from(str.encode(line)) # python 2
#fields = tuple(s.decode() for s in fs.unpack(line.encode())) # python 3
fields = [f.strip().replace(',','') for f in fields] # trim all fields
f2.write(','.join(fields)+'\n') # join fields comma separated
输出(data.csv)
CALL,Alias,Severity,File,Line,Wt,Message
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,785,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1111,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1226,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1354,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1363,1000,message for this block ( syn_ff ) is ignored by syn