将数据从文本文件保存到 python 中的 CSV 文件,分隔符为 space

saving data from text file to CSV file in python having delimiter as space

我有一个巨大的报告文件。我从中提取了所需的数据到一个名为“new.txt”的新文件中 我想将此数据保存在 csv 文件中,以便我为文件中定义的每个 headers 正确获取列和行。

我无法以正确的形式获取它,因为我们在 excel 中获取了它。我正在使用 python 2.7 并且想在不使用 pandas 包的情况下执行此操作。

SIMPLE_FILE 报告:

CALL            Alias           Severity   File                                                                                                                                                                 Line   Wt   Message
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                785    1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1111   1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1226   1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1354   1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1363   1000 message for this block ( syn_ff ) is ignored by syn

这是我的代码。

import os
import sys
from os import path
import struct

outFile = "new.txt"
new_file = open(outFile, "a+")
new_file.truncate(0)
csv_file = "report.csv"
    
    
reqlen = 0  # minumum length of row
fs = None
def calclens(line):  # calc column widths from header  row
   global fs, reqlen
   rec = []  # all column widths
   ctr = 0
   for i in range(len(line)-1):
      ctr += 1
      if line[i] == ' ' and line[i+1] != ' ':  # new column
        rec.append(ctr)  # add to column width list
        ctr = 0
   rec.append(ctr) # last column
   #print rec
   rec = rec[:-1]  # skip newline  
   rec[-1] += len(line)-i-1  # last column to end of line
   
   
   fieldwidths = rec
   fmtstring = ' '.join('{}{}'.format(abs(fw), 's') for fw in fieldwidths)  # 16s 16s 10s 166s 7s 5s 52s
   fs = struct.Struct(fmtstring)
   reqlen = len(line) 
   
   
def open_file(filename):
    try:
        contents = [] 
        with open(filename, 'r') as f1:
            contents=[line.strip() for line in f1]
        counter = contents.index("MORESIMPLE_FILESORT REPORT:")
        for item in contents[counter:]:
            new_file.write(item+"\n")

        with open("new.txt") as f:
            with open(csv_file,'w') as f2:
                for i,line in enumerate(f.readlines()):
                    if i == 0: continue  #  SIMPLE_FILE REPORT:
                    if line[0] == '+': continue  # skip ++++  line
                    if i == 1: calclens(line)  # header row, calc field positions\lengths
                    #print(len(line))
                    if len(line) < reqlen: line += ' '*(reqlen-len(line))

                    fields = fs.unpack_from(str.encode(line))  
                    
                    fields = [f.strip() for f in fields]  # trim all fields
                    
                    f2.write(','.join(fields)+'\n')  # join fields comma separated


    except Exception,e:
        print str(e)
    exit(1)


if __name__ == '__main__':
    name = sys.argv[1]
    filename = sys.argv[1]
    open_file(filename)

要拆分 固定宽度文本 文件,您可以使用 struct 模块。

下面的代码使用 header 行来确定列的位置和宽度,因此 header 行的长度必须与数据行的长度相同,并且 headers 必须是正确对齐。

此代码针对 Python 2.7。对于 Python 3,记录了所需的更改。

import struct

ss = '''
SIMPLE_FILE REPORT:
CALL            Alias           Severity  File                                                                                                                                                                  Line   Wt   Message                                            
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                785    1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1111   1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1226   1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1354   1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1363   1000 message for this block ( syn_ff ) is ignored by syn
'''.strip()

with open ('data.txt','w') as f: f.write(ss) # write test file

##################### Main Script #######################

reqlen = 0  # minumum length of row
fs = None

def getmaxlinelen(filename):  # scan file for longest line length
    mx = 0
    with open(filename) as f:
        for ln in f.readlines():
           if len(ln) > mx: mx = len(ln)
    return mx


def calclens(line, mxlen):  # calc column widths from header  row
   global fs, reqlen
   rec = []  # all column widths
   ctr = 0
   for i in range(len(line)-1):
      ctr += 1
      if line[i] == ' ' and line[i+1] != ' ':  # new column
          rec.append(ctr)  # add to column width list
          ctr = 0
   rec.append(ctr) # last column
   rec = rec[:-1]  # skip newline  
   rec[-1] += mxlen-i-1  # last column to end of line (max len data line)
   
   fieldwidths = rec
   fmtstring = ' '.join('{}{}'.format(abs(fw), 's') for fw in fieldwidths)  # 16s 16s 10s 166s 7s 5s 52s
   fs = struct.Struct(fmtstring)
   reqlen = mxlen  #len(line)

mxlen = getmaxlinelen('data.txt')

with open('data.txt') as f:
   with open('data.csv','w') as f2:
       for i,line in enumerate(f.readlines()):
           if i == 0: continue  #  SIMPLE_FILE REPORT:
           if line[0] == '+': continue  # skip ++++  line
           if i == 1: calclens(line, mxlen)  # header row, calc field positions\lengths
           if len(line) < reqlen: line += ' '*(reqlen-len(line)) # line length must match longest length
           fields = fs.unpack_from(str.encode(line))  # python 2
           #fields = tuple(s.decode() for s in fs.unpack(line.encode()))  # python 3
           fields = [f.strip().replace(',','') for f in fields]  # trim all fields
           f2.write(','.join(fields)+'\n')  # join fields comma separated

输出(data.csv)

CALL,Alias,Severity,File,Line,Wt,Message
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,785,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1111,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1226,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1354,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1363,1000,message for this block ( syn_ff ) is ignored by syn