写入 csv 文件 - python 3.7

Writing to csv file - python 3.7

enter image description hereenter image description hereenter image description here我有一些 csv 文件。文件名为 filename1.in.csv 和 filename1.out.csv,filename2.in.csv 和 filename2.out.csv。这些文件位于文件夹和子文件夹中。我正在尝试分别计算 .in.csv 文件和 .out.csv 文件的一些统计值。但最后所有这些都需要逐行写入一个带有标题的 csv 文件(在我的代码中称为 OutputFile)。每行获取输入文件的名称作为 className 以及计算值。我附上了一个 csv 文件的图像,我将其作为输入 (inFile) 来计算统计值。我没有得到想要的输出。给出 NameError: maxTimeIn, minTimeIn, stdTimeIn, qual1TimeIn, qual2TimeIn, maxLenIn, minLenIn, stdLenIn, qual1LenIn, qua12LenIn, maxTimeOut, minTimeOut, stdTimeOut, qual1TimeOut, qual2TimeOut, maxLenOut, minLenOut, stdLenOut, qual1LenOut, qua12LenOut, className not defined。 我是 Python 的新手,所以我不确定我的代码是否会按要求提供输出,非常感谢任何帮助。谢谢

import os
import pandas as pd
import csv

startdir= '.'
suffix= '.csv'
for root,dirs, files, in os.walk(startdir):
    for name in files:
        if not name.endswith(suffix):
            continue
        inFile = os.path.join(root,name)

        data = pd.read_csv(inFile)

        base = os.path.basename(inFile)
        className = os.path.splitext(base)[0]

        if inFile.endswith('.in.csv'):

            maxTimeIn = data['frame.time_delta_displayed'].max()
            minTimeIn = data['frame.time_delta_displayed'].min()
            stdTimeIn = data['frame.time_delta_displayed'].std()
            qual1TimeIn = data['frame.time_delta_displayed'].quantile(0.25)
            qual2TimeIn = data['frame.time_delta_displayed'].quantile(0.5)

            maxLenIn = data['frame.len'].max()
            minLenIn = data['frame.len'].min()
            stdLenIn = data['frame.len'].std()
            qual1LenIn = data['frame.len'].quantile(0.25)
            qua12LenIn = data['frame.len'].quantile(0.5)

        if inFile.endswith('.out.csv'):

            maxTimeOut = data['frame.time_delta_displayed'].max()
            minTimeOut = data['frame.time_delta_displayed'].min()
            stdTimeOut = data['frame.time_delta_displayed'].std()
            qual1TimeOut = data['frame.time_delta_displayed'].quantile(0.25)
            qual2TimeOut = data['frame.time_delta_displayed'].quantile(0.5)

            maxLenOut = data['frame.len'].max()
            minLenOut = data['frame.len'].min()
            stdLenOut = data['frame.len'].std()
            qual1LenOut = data['frame.len'].quantile(0.25)
            qua12LenOut = data['frame.len'].quantile(0.5)

            csvData = [['maxTimeIn', 'minTimeIn', 'stdTimeIn', 'q1TimeIn', 'q2TimeIn', 'maxLenIn', 'minLenIn', 'stdLenIn', 'q1LenIn', 'q2LenIn', 'maxTimeOut', 'minTimeOut', 'stdTimeOut', 'q1TimeOut', 'q2TimeOut', 'maxLenOut', 'minLenOut', 'stdLenOut', 'q1LenOut', 'q2LenOut','activity'],
                       [maxTimeIn, minTimeIn, stdTimeIn, qual1TimeIn, qual2TimeIn, maxLenIn, minLenIn, stdLenIn, qual1LenIn, qua12LenIn, maxTimeOut, minTimeOut, stdTimeOut, qual1TimeOut, qual2TimeOut, maxLenOut, minLenOut, stdLenOut, qual1LenOut, qua12LenOut, className]]

        with open('/root/Desktop/OutputFile.csv','w') as csvFile:
            writer = csv.writer(csvFile)
            writer.writerows(csvData)

        csvFile.close()

试试这个代码,我使用 pathlib 而不是 os.path 并重构函数以利用 Pandas 方法:

from pathlib import Path
import pandas as pd


def prepare_values(df):
    df_columns = ['frame.time_delta_displayed', 'frame.len']
    df_values = []
    for col in df_columns:
        df_values += [
            df[col].max(),
            df[col].min(),
            df[col].std(),
            df[col].quantile(0.25),
            df[col].quantile(0.5),
        ]
    return df_values


source_dir = Path('stat')

in_data = []
for file in source_dir.glob('**/*.in.csv'):
    activity = {'activity': file.stem.split('.')[0]}
    df = pd.read_csv(file)
    cols = ['maxTimeIn', 'minTimeIn', 'stdTimeIn', 'q1TimeIn', 'q2TimeIn',
            'maxLenIn', 'minLenIn', 'stdLenIn', 'q1LenIn', 'q2LenIn']
    values = prepare_values(df)
    file_data = {**activity, **dict(zip(cols, values))}
    in_data.append(file_data)

out_data = []
for file in source_dir.glob('**/*.out.csv'):
    activity = {'activity': file.stem.split('.')[0]}
    df = pd.read_csv(file)
    cols = ['maxTimeOut', 'minTimeOut', 'stdTimeOut', 'q1TimeOut', 'q2TimeOut',
            'maxLenOut', 'minLenOut', 'stdLenOut', 'q1LenOut', 'q2LenOut']
    values = prepare_values(df)
    file_data = {**activity, **dict(zip(cols, values))}
    out_data.append(file_data)

in_df = pd.DataFrame(in_data)
out_df = pd.DataFrame(out_data)
all_df = in_df.join(out_df.set_index('activity'), on='activity', how='outer')

all_df.dropna(subset=df_all.columns.tolist()[1:], how='all', inplace=True)
all_df.fillna(0, inplace=True)
all_df.to_csv('all_data.csv', index=False)