python 列出文件并从列表中提取数据时进行多处理?

python multiprocessing while listing the files and extract data from list?

我在一个目录中有多个文件。我想从每个文件中提取数据并执行此任务,我想涉及 python-multiprocessing。在下面的代码中,如果 'filelist' 包含单个元素,则 'my_result' 给出 'np_array_list' 的值,但是当 'filelist' 包含多个元素时, 'my_result' 不给出任何值结果。 'filelist' 或 'filename_arg' 有什么问题?谁能提出解决方案?

import numpy as np
import multiprocessing as mp
path = 'C:\Users\sys\PycharmProjects\MPtest\*.gwf'
filenames = [os.path.basename(x) for x in glob.glob(path)]
filelist= sorted(filenames, key=lambda x: float(re.findall("(\d+)", x)[0]))
channelslist = ["VALUE_" + str(int(n)) for n in np.linspace(201, 234, 34)]
rows = 500000
cols = len(channelslist)  # No. of channels involved in the measurement
sensdataarray = np.zeros((rows, cols))
np_array_list = []

def myfunc(filename_arg):
    for inumber, iname in enumerate(channelslist):
        sensdataarray[:, inumber] = framel.frgetvect(filename_arg, iname, verbose=False)[0]
    np_array_list.append(sensdataarray)
    return np_array_list
# MP
print("no of CPUs:", mp.cpu_count())
if __name__ == '__main__':
  pool = mp.Pool()

  my_result = pool.map(myfunc, filelist) 
# when 'filelist' contain single element then 'my_result' gives the value of 'np_array_list' 
#but when 'filelist' contains multiple element then 'my_result' does not give any result!!
  pool.close()
  pool.join()
  print('My result is :', my_result)

在您的函数中初始化 sensdataarray 并 return 它。

...
import numpy as np
import multiprocessing as mp
path = 'C:\Users\sys\PycharmProjects\MPtest\*.gwf'
filenames = [os.path.basename(x) for x in glob.glob(path)]
filelist= sorted(filenames, key=lambda x: float(re.findall("(\d+)", x)[0]))
channelslist = ["VALUE_" + str(int(n)) for n in np.linspace(201, 234, 34)]
rows = 500000
cols = len(channelslist)  # No. of channels involved in the measurement

def myfunc(filename_arg):
    sensdataarray = np.zeros((rows, cols))
    for inumber, iname in enumerate(channelslist):
        sensdataarray[:, inumber] = framel.frgetvect(filename_arg, iname, verbose=False)[0]
    return sensdataarray

# MP
print("no of CPUs:", mp.cpu_count())
if __name__ == '__main__':
  pool = mp.Pool()

  my_result = pool.map(myfunc, filelist) 
# when 'filelist' contain single element then 'my_result' gives the value of 'np_array_list' 
#but when 'filelist' contains multiple element then 'my_result' does not give any result!!
  pool.close()
  pool.join()
  print('My result is :', my_result)

此外,如果您的脚本有错误,子进程可能无法加入并可能留在后台。使用 try/finally 子句来保证池已加入,或者您可以使用 parmap 模块:

import glob
import os
import re

import framel
import parmap
import numpy as np

def myfunc(filename_arg, rows, channelslist):
    cols = len(channelslist)
    sensdataarray = np.zeros((rows, cols))
    for inumber, iname in enumerate(channelslist):
        sensdataarray[:, inumber] = framel.frgetvect(filename_arg, iname, verbose=False)[0]
    return sensdataarray


if __name__ == '__main__':
    path = 'C:\Users\sys\PycharmProjects\MPtest\*.gwf'
    filenames = [os.path.basename(x) for x in glob.glob(path)]
    filelist= sorted(filenames, key=lambda x: float(re.findall("(\d+)", x)[0]))
    channelslist = ["VALUE_" + str(int(n)) for n in np.linspace(201, 234, 34)]
    my_result = parmap.map(myfunc, filelist, rows=500000, channelslist=channelslist)
    print('My result is :', my_result)