从目录加载多个 csv 文件并将列存储在数组中
Loading multiple csv files from a directory and storing the columns in an array
我有一个目录,其中包含多个以类似模式命名的 csv 文件,例如:'1000 x 30.csv'、'1000 y 30'.csv 或'1111 z 60.csv' 等. 我的 csv 文件是 x 轴和 y 轴值的 2 列,我想将它们分别存储在一个数组中。
我想输入如下输入:1000 x 30,以便程序获取 (1000 x 30.csv) 文件的列并存储在数组中。我有一个代码,当我输入特定文件的路径时运行,我想遍历目录并在输入文件名时给我数组值。任何建议都会对我有帮助。
import csv
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit
from scipy import asarray as ar,exp
import lmfit
import glob
# reading the x/y/z values from the respective csv files
xData = []
yData = []
path = r'C:\Users\angel\OneDrive\Documents\CSV_FILES_NV_LAB11 x 30.csv'
with open(path, "r") as f_in:
reader = csv.reader(f_in)
next(reader)
for line in reader:
try:
float_1, float_2 = float(line[0]), float(line[1])
xData.append(float_1)
yData.append(float_2)
except ValueError:
continue
我认为下面的解决方案应该可以帮助您入门。我在需要的地方评论了代码,并指出了几个 SO questions/answers.
请注意,请为您的下一个问题提供一些经过修剪和净化的示例输入文件。我不得不猜测一下确切的输入是什么。 记住,你的问题越好,你的答案就越好。
输入文件,由keyboard mashing
生成
path/to/csv/files/1111 x 30.csv
x,y
156414.4189,84181.46
16989.177,61619.4698974
path/to/csv/files/11z205.csv
x,z
3.123123,56.1231
123.6546,645767.654
65465.4561989,97946.56169
实际代码:
main.py
import os
import csv
def get_files_from_path(path: str) -> list:
"""return list of files from path"""
# see the answer on the link below for a ridiculously
# complete answer for this. I tend to use this one.
# note that it also goes into subdirs of the path
#
result = []
for subdir, dirs, files in os.walk(path):
for filename in files:
filepath = subdir + os.sep + filename
# only return .csv files
if filename.lower().endswith('.csv'):
result.append(filepath)
return result
def load_csv(filename: str) -> list:
"""load a CSV file and return it as a list of dict items"""
result = []
# note that if you open a file for reading, you don't need
# to use the 'r' part
with open(filename) as infile:
reader = csv.reader(infile)
# get the column names
#
# doing this as you state that you're dealing with
# x/y and x/z values
column0, column1 = next(reader)
for line in reader:
try:
result.append({column0: float(line[0]),
column1: float(line[1])})
except Exception as e:
# I always print out error messages
# in case of random weird things
print(e)
continue
return result
def load_all(path: str) -> dict:
"""loads all CSV files into a dict"""
result = {}
csvfiles = get_files_from_path(path)
for filename in csvfiles:
# extract the filename without extension
# and us it as key name
# since we only load .csv files we can just
# remove the last 4 characters from filename
#
keyname = os.path.basename(filename)[:-4]
result[keyname] = load_csv(filename)
return result
from pprint import pprint
all = load_all('path/to/csv/files')
pprint(all)
print('\n--------------------\n')
pprint(all['11 z 205'])
产出
{'11 z 205': [{'x': 3.123123, 'z': 56.1231},
{'x': 123.6546, 'z': 645767.654},
{'x': 65465.4561989, 'z': 97946.56169}],
'1111 x 30': [{'x': 156414.4189, 'y': 84181.46},
{'x': 16989.177, 'y': 61619.4698974}]}
--------------------
[{'x': 3.123123, 'z': 56.1231},
{'x': 123.6546, 'z': 645767.654},
{'x': 65465.4561989, 'z': 97946.56169}]
我有一个目录,其中包含多个以类似模式命名的 csv 文件,例如:'1000 x 30.csv'、'1000 y 30'.csv 或'1111 z 60.csv' 等. 我的 csv 文件是 x 轴和 y 轴值的 2 列,我想将它们分别存储在一个数组中。 我想输入如下输入:1000 x 30,以便程序获取 (1000 x 30.csv) 文件的列并存储在数组中。我有一个代码,当我输入特定文件的路径时运行,我想遍历目录并在输入文件名时给我数组值。任何建议都会对我有帮助。
import csv
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit
from scipy import asarray as ar,exp
import lmfit
import glob
# reading the x/y/z values from the respective csv files
xData = []
yData = []
path = r'C:\Users\angel\OneDrive\Documents\CSV_FILES_NV_LAB11 x 30.csv'
with open(path, "r") as f_in:
reader = csv.reader(f_in)
next(reader)
for line in reader:
try:
float_1, float_2 = float(line[0]), float(line[1])
xData.append(float_1)
yData.append(float_2)
except ValueError:
continue
我认为下面的解决方案应该可以帮助您入门。我在需要的地方评论了代码,并指出了几个 SO questions/answers.
请注意,请为您的下一个问题提供一些经过修剪和净化的示例输入文件。我不得不猜测一下确切的输入是什么。 记住,你的问题越好,你的答案就越好。
输入文件,由keyboard mashing
生成path/to/csv/files/1111 x 30.csv
x,y
156414.4189,84181.46
16989.177,61619.4698974
path/to/csv/files/11z205.csv
x,z
3.123123,56.1231
123.6546,645767.654
65465.4561989,97946.56169
实际代码:
main.py
import os
import csv
def get_files_from_path(path: str) -> list:
"""return list of files from path"""
# see the answer on the link below for a ridiculously
# complete answer for this. I tend to use this one.
# note that it also goes into subdirs of the path
#
result = []
for subdir, dirs, files in os.walk(path):
for filename in files:
filepath = subdir + os.sep + filename
# only return .csv files
if filename.lower().endswith('.csv'):
result.append(filepath)
return result
def load_csv(filename: str) -> list:
"""load a CSV file and return it as a list of dict items"""
result = []
# note that if you open a file for reading, you don't need
# to use the 'r' part
with open(filename) as infile:
reader = csv.reader(infile)
# get the column names
#
# doing this as you state that you're dealing with
# x/y and x/z values
column0, column1 = next(reader)
for line in reader:
try:
result.append({column0: float(line[0]),
column1: float(line[1])})
except Exception as e:
# I always print out error messages
# in case of random weird things
print(e)
continue
return result
def load_all(path: str) -> dict:
"""loads all CSV files into a dict"""
result = {}
csvfiles = get_files_from_path(path)
for filename in csvfiles:
# extract the filename without extension
# and us it as key name
# since we only load .csv files we can just
# remove the last 4 characters from filename
#
keyname = os.path.basename(filename)[:-4]
result[keyname] = load_csv(filename)
return result
from pprint import pprint
all = load_all('path/to/csv/files')
pprint(all)
print('\n--------------------\n')
pprint(all['11 z 205'])
产出
{'11 z 205': [{'x': 3.123123, 'z': 56.1231},
{'x': 123.6546, 'z': 645767.654},
{'x': 65465.4561989, 'z': 97946.56169}],
'1111 x 30': [{'x': 156414.4189, 'y': 84181.46},
{'x': 16989.177, 'y': 61619.4698974}]}
--------------------
[{'x': 3.123123, 'z': 56.1231},
{'x': 123.6546, 'z': 645767.654},
{'x': 65465.4561989, 'z': 97946.56169}]