从 .txt 中提取 space-separated 列并添加以保存在新数据框中

Extract space-separated columns from .txt and add to save in new dataframe

我是 python 的新手,我正在尝试创建一个循环遍历文件夹并获取所有包含 2 列数据的 .txt 文件的脚本,这些数据仅由空格分隔.然后我只想从这些 .txts 中取出第二列,并将它们保存到一个新的数据框中,其中 'lag' 作为索引,文件名作为 header。我有点卡住了,因为我似乎无法比打印文件名更进一步,仅此而已。任何帮助将不胜感激。 (PS 对令人尴尬的 -50 到 50 行表示歉意 - 我知道有一种更有效的方法,但找不到可以处理负值的方法。 提前致谢。

    def changeFolder(self):
    #print('woo')

    folder = QFileDialog.getExistingDirectory(None, 'Project Data', '.csv files')
    print(folder)
    if folder == None:
        return
    else:
        print(folder)

    # import required modules
    print('woo')
    from glob import glob
    import pandas as pd
    import numpy as np
    import os
    for files in os.listdir(folder):
        if files.endswith(".txt"):
            print(files)
            data = [pd.read_csv(files, sep=" ", header=None) for files in folder]

    for data in files:
        print(data)
    # transpose columns using numpy
       #tcols = np.transpose(cols)
    # create lag variable for the time lag array from -50 to 50
    lag = [-50, -49, -48, -47, -46, -45, -44, -43, -42, -41, -40, -39, -38, -37, -36, -35, -34, -33, -32, -31, -30,
           -29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9,
           -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
           21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
           47, 48, 49, 50]
    # constructs dataframe using pandas with the transposed columns, header as respective filenames and index column as time lag
    df = pd.DataFrame(data, columns=[files], index=lag)
    # converts dataframe to .csv file and saves as specified filename below in specified path
    extracted = df.to_csv(r'D:\GLaDOS-CAMPUS\data\TestData-AB\ExtractedABFiles.csv')

    ##Dialogue box in case of success
    mbox = QMessageBox()
    mbox.setText("Hopefully this worked!")
    mbox.setDetailedText("")
    mbox.setStandardButtons(QMessageBox.Ok)
    mbox.setWindowTitle('CSV Batch Processor')
    mbox.exec_()

试试这个:

lag = [-50, -49, -48, -47, -46, -45, -44, -43, -42, -41, -40, -39, -38, -37, -36, -35, -34, -33, -32, -31, -30,
        -29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9,
        -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
        21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
        47, 48, 49, 50]
df1 = pd.DataFrame(lag)
for file in os.listdir(r"C:\Users\Wilian\Documents\DPROF"):
    if file.endswith(".txt"):
        df2 = pd.read_csv(file, delimiter = "\t")
        df1[Path(file).stem]= df2.iloc[:,1]
        
df1.set_index(0,inplace=True)
df1.to_csv(r'D:\GLaDOS-CAMPUS\data\TestData-AB\ExtractedABFiles.csv')