从 .txt 中提取 space-separated 列并添加以保存在新数据框中
Extract space-separated columns from .txt and add to save in new dataframe
我是 python 的新手,我正在尝试创建一个循环遍历文件夹并获取所有包含 2 列数据的 .txt 文件的脚本,这些数据仅由空格分隔.然后我只想从这些 .txts 中取出第二列,并将它们保存到一个新的数据框中,其中 'lag' 作为索引,文件名作为 header。我有点卡住了,因为我似乎无法比打印文件名更进一步,仅此而已。任何帮助将不胜感激。 (PS 对令人尴尬的 -50 到 50 行表示歉意 - 我知道有一种更有效的方法,但找不到可以处理负值的方法。
提前致谢。
def changeFolder(self):
#print('woo')
folder = QFileDialog.getExistingDirectory(None, 'Project Data', '.csv files')
print(folder)
if folder == None:
return
else:
print(folder)
# import required modules
print('woo')
from glob import glob
import pandas as pd
import numpy as np
import os
for files in os.listdir(folder):
if files.endswith(".txt"):
print(files)
data = [pd.read_csv(files, sep=" ", header=None) for files in folder]
for data in files:
print(data)
# transpose columns using numpy
#tcols = np.transpose(cols)
# create lag variable for the time lag array from -50 to 50
lag = [-50, -49, -48, -47, -46, -45, -44, -43, -42, -41, -40, -39, -38, -37, -36, -35, -34, -33, -32, -31, -30,
-29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9,
-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
47, 48, 49, 50]
# constructs dataframe using pandas with the transposed columns, header as respective filenames and index column as time lag
df = pd.DataFrame(data, columns=[files], index=lag)
# converts dataframe to .csv file and saves as specified filename below in specified path
extracted = df.to_csv(r'D:\GLaDOS-CAMPUS\data\TestData-AB\ExtractedABFiles.csv')
##Dialogue box in case of success
mbox = QMessageBox()
mbox.setText("Hopefully this worked!")
mbox.setDetailedText("")
mbox.setStandardButtons(QMessageBox.Ok)
mbox.setWindowTitle('CSV Batch Processor')
mbox.exec_()
试试这个:
lag = [-50, -49, -48, -47, -46, -45, -44, -43, -42, -41, -40, -39, -38, -37, -36, -35, -34, -33, -32, -31, -30,
-29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9,
-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
47, 48, 49, 50]
df1 = pd.DataFrame(lag)
for file in os.listdir(r"C:\Users\Wilian\Documents\DPROF"):
if file.endswith(".txt"):
df2 = pd.read_csv(file, delimiter = "\t")
df1[Path(file).stem]= df2.iloc[:,1]
df1.set_index(0,inplace=True)
df1.to_csv(r'D:\GLaDOS-CAMPUS\data\TestData-AB\ExtractedABFiles.csv')
我是 python 的新手,我正在尝试创建一个循环遍历文件夹并获取所有包含 2 列数据的 .txt 文件的脚本,这些数据仅由空格分隔.然后我只想从这些 .txts 中取出第二列,并将它们保存到一个新的数据框中,其中 'lag' 作为索引,文件名作为 header。我有点卡住了,因为我似乎无法比打印文件名更进一步,仅此而已。任何帮助将不胜感激。 (PS 对令人尴尬的 -50 到 50 行表示歉意 - 我知道有一种更有效的方法,但找不到可以处理负值的方法。 提前致谢。
def changeFolder(self):
#print('woo')
folder = QFileDialog.getExistingDirectory(None, 'Project Data', '.csv files')
print(folder)
if folder == None:
return
else:
print(folder)
# import required modules
print('woo')
from glob import glob
import pandas as pd
import numpy as np
import os
for files in os.listdir(folder):
if files.endswith(".txt"):
print(files)
data = [pd.read_csv(files, sep=" ", header=None) for files in folder]
for data in files:
print(data)
# transpose columns using numpy
#tcols = np.transpose(cols)
# create lag variable for the time lag array from -50 to 50
lag = [-50, -49, -48, -47, -46, -45, -44, -43, -42, -41, -40, -39, -38, -37, -36, -35, -34, -33, -32, -31, -30,
-29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9,
-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
47, 48, 49, 50]
# constructs dataframe using pandas with the transposed columns, header as respective filenames and index column as time lag
df = pd.DataFrame(data, columns=[files], index=lag)
# converts dataframe to .csv file and saves as specified filename below in specified path
extracted = df.to_csv(r'D:\GLaDOS-CAMPUS\data\TestData-AB\ExtractedABFiles.csv')
##Dialogue box in case of success
mbox = QMessageBox()
mbox.setText("Hopefully this worked!")
mbox.setDetailedText("")
mbox.setStandardButtons(QMessageBox.Ok)
mbox.setWindowTitle('CSV Batch Processor')
mbox.exec_()
试试这个:
lag = [-50, -49, -48, -47, -46, -45, -44, -43, -42, -41, -40, -39, -38, -37, -36, -35, -34, -33, -32, -31, -30,
-29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9,
-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
47, 48, 49, 50]
df1 = pd.DataFrame(lag)
for file in os.listdir(r"C:\Users\Wilian\Documents\DPROF"):
if file.endswith(".txt"):
df2 = pd.read_csv(file, delimiter = "\t")
df1[Path(file).stem]= df2.iloc[:,1]
df1.set_index(0,inplace=True)
df1.to_csv(r'D:\GLaDOS-CAMPUS\data\TestData-AB\ExtractedABFiles.csv')