从 python 中的文本文件向 csv 添加一个值

Add one more value to csv from text file in python

我正在将多个文本文件转换为一个 csv 文件。我的文本文件如下所示:

ITEM: TIMESTEP
55000
ITEM: NUMBER OF ATOMS
4365
ITEM: BOX BOUNDS ff ff ff
-0.3 0.3
-0.6 0.6
-0.6 0.6
ITEM: ATOMS id type x y z vx vy vz fx fy fz omegax omegay omegaz radius 
4356 1 -0.0885288 -0.0101421 -0.48871 -0.000941682 0.778688 -0.0153902 -0.00720861 -0.0533703 0.0104717 0.35581 -0.0601358 -0.436049 0.01 
4227 1 0.0157977 0.00542603 -0.488429 -0.00996111 0.784119 0.00813807 -0.000491847 0.0144889 -0.0120111 1.08208 -0.0671177 0.369492 0.01 
3973 1 0.0179724 0.0256167 -0.48799 -0.00582994 0.772455 0.0394544 0.0109589 -0.0187232 -0.00111718 -0.0586513 -0.162943 1.12784 0.01 
4300 1 0.0900919 0.0248592 -0.488025 -0.000455483 0.769978 0.0388239 -0.00364509 0.0409803 -0.00269227 3.94355 -0.0249566 -0.223111 0.01 
4200 1 -0.0230223 0.0329911 -0.483108 -0.00238 0.778547 0.0500186 0.0421189 -0.021588 0.05607 0.112989 -0.0813771 -1.09981 0.015 
4339 1 0.00143577 0.0368542 -0.488107 0.000587848 0.784672 0.0593572 0.00385562 -0.00475113 -0.00710483 -0.201196 0.158512 -5.63826 0.01 
4106 1 0.0648392 0.0269728 -0.483248 -0.00365836 0.766081 0.0395827 0.0418642 0.1802 0.0547313 -0.0578358 0.124205 -0.96464 0.015 
4104 1 -0.084453 0.0507114 -0.482726 -0.000596577 0.75636 0.0806599 0.000817826 0.0119286 -0.0150014 -0.0864852 -0.103877 0.198773 0.015

现在我的 csv 文件包含第 9 行之后的值(在 python 代码第 8 行中)。 我想在 csv 中也包含第 2 行(Header - TIMESTEP)以及 9.

之后的所有值

我尝试编辑我的代码但没有成功。我能得到一些帮助吗: 我的代码在这里:

import numpy as np
import pandas as pd
import csv
import glob
import time


def main():
    start = time.time()
    data_folder = "./all/" #folder name
    files = glob.glob(data_folder + '*dump*.data')
    print("Total files:", len(files))
    # get header from one of the files
    #header = []
    with open('all/dump46000.data', 'r') as f:
        #lines = f.readlines()
        for _ in range(8):
            next(f) # skip first 8 lines
        header = ','.join(f.readline().split()[2:]) + '\n'
        headers = ','.join(f.readline().split()[2:])
        #header.append(headers)
        #header.append('timestep')
        print(header)
    for file in files:
        with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
            g.write(header) # write the header
            for _ in range(9):
                next(f) # skip first 9 lines
            for line in f:
                g.write(line.rstrip().replace(' ', ',') + '\n')
    print(time.time() - start)


if __name__ == "__main__":
    main()

我的文件夹 all 包含 600 多个文件:

['./all/dump501000.data', 
'./all/dump307000.data',
'./all/dump612000.data',
'./all/dump369000.data',
'./all/dump23000.data',
'./all/dump470000.data',
'./all/dump235000.data',
'./all/dump6000.data',
'./all/dump568000.data',
'./all/dump506000.data',
'./all/dump623000.data',
'./all/dump329000.data',
'./all/dump220000.data', 
.....................
....................

我想要这个来自文本文件的 csv 文件:

id type x y z vx vy vz fx fy fz omegax omegay omegaz radius TIMESTEP

但是我得到了这个 csv

id type x y z vx vy vz fx fy fz omegax omegay omegaz radius

谢谢

根据您的需要,以下是有效的方法

import numpy as np
import pandas as pd
import csv
import glob
import time


def main():
    start = time.perf_counter()
    data_folder = "./all/" #folder name
    files = glob.glob(data_folder + '*dump*.data')
    print("Total files:", len(files))
    for file in files:
        with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
            header = f.readline().split("ITEM: ")[1] + '\n'
            headers = f.readline()
            print(header)
            g.write(header)
            g.write(headers)
            for _ in range(6):
                next(f)
            for line in f:
                g.write(line.rstrip().replace(' ', ',') + '\n')
    print(time.perf_counter() - start)


if __name__ == "__main__":
    main()

如果您需要任何其他语法或最终 CSV 中的其他内容,请告诉我。 另外,总是使用 time.perf_counter 来计时,这样更准确。

enter code here您可以尝试将 TIMESTEP 添加到 csv 数据中。我只是想知道您是否需要为每个文件打印 header。我的理解是你可以在顶部打印一次 header 。如果要为每个文件打印它,请将其带入 for 循环。

import numpy as np
import pandas as pd
import csv
import glob
import time


def main():
    start = time.time()
    data_folder = "./all/" #folder name
    files = glob.glob(data_folder + '*dump*.data')
    print("Total files:", len(files))
    # get header from one of the files
    header = []

    with open('all/dump46000.data', 'r') as f:
        #lines = f.readlines()
        header.extend(f.readline().split()[1:]) 
        timeStep = f.readline().split()
        
        for _ in range(6):
            next(f) # skip first 8 lines
        header.extend(f.readline().split()[2:]) 
        a = True
        print(header)
        headerString = ','.join(header)
        

    for file in files:
        with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
            next(f)
            g.write(headerString+ '\n') # write the header
            timeStep = f.readline().split()
            for _ in range(7):
                next(f)
            for line in f:
                file_line = line.split()
                file_line.insert(0,timeStep[0])
                data = ','.join(file_line)
                g.write(data + '\n')

    print(time.time() - start)


if __name__ == "__main__":
    main()