从 python 中的文本文件向 csv 添加一个值
Add one more value to csv from text file in python
我正在将多个文本文件转换为一个 csv 文件。我的文本文件如下所示:
ITEM: TIMESTEP
55000
ITEM: NUMBER OF ATOMS
4365
ITEM: BOX BOUNDS ff ff ff
-0.3 0.3
-0.6 0.6
-0.6 0.6
ITEM: ATOMS id type x y z vx vy vz fx fy fz omegax omegay omegaz radius
4356 1 -0.0885288 -0.0101421 -0.48871 -0.000941682 0.778688 -0.0153902 -0.00720861 -0.0533703 0.0104717 0.35581 -0.0601358 -0.436049 0.01
4227 1 0.0157977 0.00542603 -0.488429 -0.00996111 0.784119 0.00813807 -0.000491847 0.0144889 -0.0120111 1.08208 -0.0671177 0.369492 0.01
3973 1 0.0179724 0.0256167 -0.48799 -0.00582994 0.772455 0.0394544 0.0109589 -0.0187232 -0.00111718 -0.0586513 -0.162943 1.12784 0.01
4300 1 0.0900919 0.0248592 -0.488025 -0.000455483 0.769978 0.0388239 -0.00364509 0.0409803 -0.00269227 3.94355 -0.0249566 -0.223111 0.01
4200 1 -0.0230223 0.0329911 -0.483108 -0.00238 0.778547 0.0500186 0.0421189 -0.021588 0.05607 0.112989 -0.0813771 -1.09981 0.015
4339 1 0.00143577 0.0368542 -0.488107 0.000587848 0.784672 0.0593572 0.00385562 -0.00475113 -0.00710483 -0.201196 0.158512 -5.63826 0.01
4106 1 0.0648392 0.0269728 -0.483248 -0.00365836 0.766081 0.0395827 0.0418642 0.1802 0.0547313 -0.0578358 0.124205 -0.96464 0.015
4104 1 -0.084453 0.0507114 -0.482726 -0.000596577 0.75636 0.0806599 0.000817826 0.0119286 -0.0150014 -0.0864852 -0.103877 0.198773 0.015
现在我的 csv 文件包含第 9 行之后的值(在 python 代码第 8 行中)。
我想在 csv 中也包含第 2 行(Header - TIMESTEP)以及 9.
之后的所有值
我尝试编辑我的代码但没有成功。我能得到一些帮助吗:
我的代码在这里:
import numpy as np
import pandas as pd
import csv
import glob
import time
def main():
start = time.time()
data_folder = "./all/" #folder name
files = glob.glob(data_folder + '*dump*.data')
print("Total files:", len(files))
# get header from one of the files
#header = []
with open('all/dump46000.data', 'r') as f:
#lines = f.readlines()
for _ in range(8):
next(f) # skip first 8 lines
header = ','.join(f.readline().split()[2:]) + '\n'
headers = ','.join(f.readline().split()[2:])
#header.append(headers)
#header.append('timestep')
print(header)
for file in files:
with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
g.write(header) # write the header
for _ in range(9):
next(f) # skip first 9 lines
for line in f:
g.write(line.rstrip().replace(' ', ',') + '\n')
print(time.time() - start)
if __name__ == "__main__":
main()
我的文件夹 all
包含 600 多个文件:
['./all/dump501000.data',
'./all/dump307000.data',
'./all/dump612000.data',
'./all/dump369000.data',
'./all/dump23000.data',
'./all/dump470000.data',
'./all/dump235000.data',
'./all/dump6000.data',
'./all/dump568000.data',
'./all/dump506000.data',
'./all/dump623000.data',
'./all/dump329000.data',
'./all/dump220000.data',
.....................
....................
我想要这个来自文本文件的 csv 文件:
id type x y z vx vy vz fx fy fz omegax omegay omegaz radius TIMESTEP
但是我得到了这个 csv
id type x y z vx vy vz fx fy fz omegax omegay omegaz radius
谢谢
根据您的需要,以下是有效的方法
import numpy as np
import pandas as pd
import csv
import glob
import time
def main():
start = time.perf_counter()
data_folder = "./all/" #folder name
files = glob.glob(data_folder + '*dump*.data')
print("Total files:", len(files))
for file in files:
with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
header = f.readline().split("ITEM: ")[1] + '\n'
headers = f.readline()
print(header)
g.write(header)
g.write(headers)
for _ in range(6):
next(f)
for line in f:
g.write(line.rstrip().replace(' ', ',') + '\n')
print(time.perf_counter() - start)
if __name__ == "__main__":
main()
如果您需要任何其他语法或最终 CSV 中的其他内容,请告诉我。
另外,总是使用 time.perf_counter 来计时,这样更准确。
enter code here
您可以尝试将 TIMESTEP 添加到 csv 数据中。我只是想知道您是否需要为每个文件打印 header。我的理解是你可以在顶部打印一次 header 。如果要为每个文件打印它,请将其带入 for 循环。
import numpy as np
import pandas as pd
import csv
import glob
import time
def main():
start = time.time()
data_folder = "./all/" #folder name
files = glob.glob(data_folder + '*dump*.data')
print("Total files:", len(files))
# get header from one of the files
header = []
with open('all/dump46000.data', 'r') as f:
#lines = f.readlines()
header.extend(f.readline().split()[1:])
timeStep = f.readline().split()
for _ in range(6):
next(f) # skip first 8 lines
header.extend(f.readline().split()[2:])
a = True
print(header)
headerString = ','.join(header)
for file in files:
with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
next(f)
g.write(headerString+ '\n') # write the header
timeStep = f.readline().split()
for _ in range(7):
next(f)
for line in f:
file_line = line.split()
file_line.insert(0,timeStep[0])
data = ','.join(file_line)
g.write(data + '\n')
print(time.time() - start)
if __name__ == "__main__":
main()
我正在将多个文本文件转换为一个 csv 文件。我的文本文件如下所示:
ITEM: TIMESTEP
55000
ITEM: NUMBER OF ATOMS
4365
ITEM: BOX BOUNDS ff ff ff
-0.3 0.3
-0.6 0.6
-0.6 0.6
ITEM: ATOMS id type x y z vx vy vz fx fy fz omegax omegay omegaz radius
4356 1 -0.0885288 -0.0101421 -0.48871 -0.000941682 0.778688 -0.0153902 -0.00720861 -0.0533703 0.0104717 0.35581 -0.0601358 -0.436049 0.01
4227 1 0.0157977 0.00542603 -0.488429 -0.00996111 0.784119 0.00813807 -0.000491847 0.0144889 -0.0120111 1.08208 -0.0671177 0.369492 0.01
3973 1 0.0179724 0.0256167 -0.48799 -0.00582994 0.772455 0.0394544 0.0109589 -0.0187232 -0.00111718 -0.0586513 -0.162943 1.12784 0.01
4300 1 0.0900919 0.0248592 -0.488025 -0.000455483 0.769978 0.0388239 -0.00364509 0.0409803 -0.00269227 3.94355 -0.0249566 -0.223111 0.01
4200 1 -0.0230223 0.0329911 -0.483108 -0.00238 0.778547 0.0500186 0.0421189 -0.021588 0.05607 0.112989 -0.0813771 -1.09981 0.015
4339 1 0.00143577 0.0368542 -0.488107 0.000587848 0.784672 0.0593572 0.00385562 -0.00475113 -0.00710483 -0.201196 0.158512 -5.63826 0.01
4106 1 0.0648392 0.0269728 -0.483248 -0.00365836 0.766081 0.0395827 0.0418642 0.1802 0.0547313 -0.0578358 0.124205 -0.96464 0.015
4104 1 -0.084453 0.0507114 -0.482726 -0.000596577 0.75636 0.0806599 0.000817826 0.0119286 -0.0150014 -0.0864852 -0.103877 0.198773 0.015
现在我的 csv 文件包含第 9 行之后的值(在 python 代码第 8 行中)。 我想在 csv 中也包含第 2 行(Header - TIMESTEP)以及 9.
之后的所有值我尝试编辑我的代码但没有成功。我能得到一些帮助吗: 我的代码在这里:
import numpy as np
import pandas as pd
import csv
import glob
import time
def main():
start = time.time()
data_folder = "./all/" #folder name
files = glob.glob(data_folder + '*dump*.data')
print("Total files:", len(files))
# get header from one of the files
#header = []
with open('all/dump46000.data', 'r') as f:
#lines = f.readlines()
for _ in range(8):
next(f) # skip first 8 lines
header = ','.join(f.readline().split()[2:]) + '\n'
headers = ','.join(f.readline().split()[2:])
#header.append(headers)
#header.append('timestep')
print(header)
for file in files:
with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
g.write(header) # write the header
for _ in range(9):
next(f) # skip first 9 lines
for line in f:
g.write(line.rstrip().replace(' ', ',') + '\n')
print(time.time() - start)
if __name__ == "__main__":
main()
我的文件夹 all
包含 600 多个文件:
['./all/dump501000.data',
'./all/dump307000.data',
'./all/dump612000.data',
'./all/dump369000.data',
'./all/dump23000.data',
'./all/dump470000.data',
'./all/dump235000.data',
'./all/dump6000.data',
'./all/dump568000.data',
'./all/dump506000.data',
'./all/dump623000.data',
'./all/dump329000.data',
'./all/dump220000.data',
.....................
....................
我想要这个来自文本文件的 csv 文件:
id type x y z vx vy vz fx fy fz omegax omegay omegaz radius TIMESTEP
但是我得到了这个 csv
id type x y z vx vy vz fx fy fz omegax omegay omegaz radius
谢谢
根据您的需要,以下是有效的方法
import numpy as np
import pandas as pd
import csv
import glob
import time
def main():
start = time.perf_counter()
data_folder = "./all/" #folder name
files = glob.glob(data_folder + '*dump*.data')
print("Total files:", len(files))
for file in files:
with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
header = f.readline().split("ITEM: ")[1] + '\n'
headers = f.readline()
print(header)
g.write(header)
g.write(headers)
for _ in range(6):
next(f)
for line in f:
g.write(line.rstrip().replace(' ', ',') + '\n')
print(time.perf_counter() - start)
if __name__ == "__main__":
main()
如果您需要任何其他语法或最终 CSV 中的其他内容,请告诉我。 另外,总是使用 time.perf_counter 来计时,这样更准确。
enter code here
您可以尝试将 TIMESTEP 添加到 csv 数据中。我只是想知道您是否需要为每个文件打印 header。我的理解是你可以在顶部打印一次 header 。如果要为每个文件打印它,请将其带入 for 循环。
import numpy as np
import pandas as pd
import csv
import glob
import time
def main():
start = time.time()
data_folder = "./all/" #folder name
files = glob.glob(data_folder + '*dump*.data')
print("Total files:", len(files))
# get header from one of the files
header = []
with open('all/dump46000.data', 'r') as f:
#lines = f.readlines()
header.extend(f.readline().split()[1:])
timeStep = f.readline().split()
for _ in range(6):
next(f) # skip first 8 lines
header.extend(f.readline().split()[2:])
a = True
print(header)
headerString = ','.join(header)
for file in files:
with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
next(f)
g.write(headerString+ '\n') # write the header
timeStep = f.readline().split()
for _ in range(7):
next(f)
for line in f:
file_line = line.split()
file_line.insert(0,timeStep[0])
data = ','.join(file_line)
g.write(data + '\n')
print(time.time() - start)
if __name__ == "__main__":
main()