将数据从文本文件保存到 python 中的 CSV 文件，分隔符为 space

Question

我有一个巨大的报告文件。我从中提取了所需的数据到一个名为“new.txt”的新文件中我想将此数据保存在 csv 文件中，以便我为文件中定义的每个 headers 正确获取列和行。

我无法以正确的形式获取它，因为我们在 excel 中获取了它。我正在使用 python 2.7 并且想在不使用 pandas 包的情况下执行此操作。

SIMPLE_FILE 报告：

CALL            Alias           Severity   File                                                                                                                                                                 Line   Wt   Message
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                785    1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1111   1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1226   1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1354   1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1363   1000 message for this block ( syn_ff ) is ignored by syn

这是我的代码。

import os
import sys
from os import path
import struct

outFile = "new.txt"
new_file = open(outFile, "a+")
new_file.truncate(0)
csv_file = "report.csv"
    
    
reqlen = 0  # minumum length of row
fs = None
def calclens(line):  # calc column widths from header  row
   global fs, reqlen
   rec = []  # all column widths
   ctr = 0
   for i in range(len(line)-1):
      ctr += 1
      if line[i] == ' ' and line[i+1] != ' ':  # new column
        rec.append(ctr)  # add to column width list
        ctr = 0
   rec.append(ctr) # last column
   #print rec
   rec = rec[:-1]  # skip newline  
   rec[-1] += len(line)-i-1  # last column to end of line
   
   
   fieldwidths = rec
   fmtstring = ' '.join('{}{}'.format(abs(fw), 's') for fw in fieldwidths)  # 16s 16s 10s 166s 7s 5s 52s
   fs = struct.Struct(fmtstring)
   reqlen = len(line) 
   
   
def open_file(filename):
    try:
        contents = [] 
        with open(filename, 'r') as f1:
            contents=[line.strip() for line in f1]
        counter = contents.index("MORESIMPLE_FILESORT REPORT:")
        for item in contents[counter:]:
            new_file.write(item+"\n")

        with open("new.txt") as f:
            with open(csv_file,'w') as f2:
                for i,line in enumerate(f.readlines()):
                    if i == 0: continue  #  SIMPLE_FILE REPORT:
                    if line[0] == '+': continue  # skip ++++  line
                    if i == 1: calclens(line)  # header row, calc field positions\lengths
                    #print(len(line))
                    if len(line) < reqlen: line += ' '*(reqlen-len(line))

                    fields = fs.unpack_from(str.encode(line))  
                    
                    fields = [f.strip() for f in fields]  # trim all fields
                    
                    f2.write(','.join(fields)+'\n')  # join fields comma separated


    except Exception,e:
        print str(e)
    exit(1)


if __name__ == '__main__':
    name = sys.argv[1]
    filename = sys.argv[1]
    open_file(filename)

Answer 1

要拆分 固定宽度文本 文件，您可以使用 struct 模块。

下面的代码使用 header 行来确定列的位置和宽度，因此 header 行的长度必须与数据行的长度相同，并且 headers 必须是正确对齐。

此代码针对 Python 2.7。对于 Python 3，记录了所需的更改。

import struct

ss = '''
SIMPLE_FILE REPORT:
CALL            Alias           Severity  File                                                                                                                                                                  Line   Wt   Message                                            
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                785    1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1111   1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1226   1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1354   1000 message for this block ( syn_ff ) is ignored by syn
ACT_99          ACT_99          Warning   /application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png                                                                                                1363   1000 message for this block ( syn_ff ) is ignored by syn
'''.strip()

with open ('data.txt','w') as f: f.write(ss) # write test file

##################### Main Script #######################

reqlen = 0  # minumum length of row
fs = None

def getmaxlinelen(filename):  # scan file for longest line length
    mx = 0
    with open(filename) as f:
        for ln in f.readlines():
           if len(ln) > mx: mx = len(ln)
    return mx


def calclens(line, mxlen):  # calc column widths from header  row
   global fs, reqlen
   rec = []  # all column widths
   ctr = 0
   for i in range(len(line)-1):
      ctr += 1
      if line[i] == ' ' and line[i+1] != ' ':  # new column
          rec.append(ctr)  # add to column width list
          ctr = 0
   rec.append(ctr) # last column
   rec = rec[:-1]  # skip newline  
   rec[-1] += mxlen-i-1  # last column to end of line (max len data line)
   
   fieldwidths = rec
   fmtstring = ' '.join('{}{}'.format(abs(fw), 's') for fw in fieldwidths)  # 16s 16s 10s 166s 7s 5s 52s
   fs = struct.Struct(fmtstring)
   reqlen = mxlen  #len(line)

mxlen = getmaxlinelen('data.txt')

with open('data.txt') as f:
   with open('data.csv','w') as f2:
       for i,line in enumerate(f.readlines()):
           if i == 0: continue  #  SIMPLE_FILE REPORT:
           if line[0] == '+': continue  # skip ++++  line
           if i == 1: calclens(line, mxlen)  # header row, calc field positions\lengths
           if len(line) < reqlen: line += ' '*(reqlen-len(line)) # line length must match longest length
           fields = fs.unpack_from(str.encode(line))  # python 2
           #fields = tuple(s.decode() for s in fs.unpack(line.encode()))  # python 3
           fields = [f.strip().replace(',','') for f in fields]  # trim all fields
           f2.write(','.join(fields)+'\n')  # join fields comma separated

输出（data.csv）

CALL,Alias,Severity,File,Line,Wt,Message
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,785,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1111,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1226,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1354,1000,message for this block ( syn_ff ) is ignored by syn
ACT_99,ACT_99,Warning,/application/XX/VV/2019_2_1/VV/2019.2/data/ip/xm/xm_123/ldm/xm_123.png,1363,1000,message for this block ( syn_ff ) is ignored by syn

将数据从文本文件保存到 python 中的 CSV 文件，分隔符为 space

saving data from text file to CSV file in python having delimiter as space

csv

file

text-files

python-2.7