使用 matplotlib 的内存泄漏
memory leaks using matplotlib
这不是一个错误报告——即使这些泄漏可能是 mpl 错误的结果,请解释问题并寻求解决方法。
问题很简单:绘制大量数据(使用 plot() 或 scatter()),clear/release 一切,垃圾收集,但仍未释放几乎所有内存。
Line # Mem usage Increment Line Contents
================================================
391 122.312 MiB 0.000 MiB @profile
392 def plot_network_scatterplot(t_sim_stop, spikes_mat, n_cells_per_area, n_cells, basedir_output, condition_idx):
393
394 # make network scatterplot
395 122.312 MiB 0.000 MiB w, h = plt.figaspect(.1/(t_sim_stop/1E3))
396 122.324 MiB 0.012 MiB fig = mpl.figure.Figure(figsize=(10*w, 10*h))
397 122.328 MiB 0.004 MiB canvas = FigureCanvas(fig)
398 122.879 MiB 0.551 MiB ax = fig.add_axes([.01, .1, .98, .8])
399 134.879 MiB 12.000 MiB edgecolor_vec = np.array([(1., 0., 0.), (0., 0., 1.)])[1-((spikes_mat[:,3]+1)/2).astype(np.int)]
400 '''pathcoll = ax.scatter(spikes_mat[:,1],
401 spikes_mat[:,0] + n_cells_per_area * (spikes_mat[:,2]-1),
402 s=.5,
403 c=spikes_mat[:,3],
404 edgecolor=edgecolor_vec)'''
405 440.098 MiB 305.219 MiB pathcoll = ax.plot(np.random.rand(10000000), np.random.rand(10000000))
406 440.098 MiB 0.000 MiB ax.set_xlim([0., t_sim_stop])
407 440.098 MiB 0.000 MiB ax.set_ylim([1, n_cells])
408 440.098 MiB 0.000 MiB plt.xlabel('Time [ms]')
409 440.098 MiB 0.000 MiB plt.ylabel('Cell ID')
410 440.098 MiB 0.000 MiB plt.suptitle('Network activity scatterplot')
411 #plt.savefig(os.path.join(basedir_output, 'network_scatterplot-[cond=' + str(condition_idx) + '].png'))
412 931.898 MiB 491.801 MiB canvas.print_figure(os.path.join(basedir_output, 'network_scatterplot-[cond=' + str(condition_idx) + '].png'))
413 #fig.canvas.close()
414 #pathcoll.set_offsets([])
415 #pathcoll.remove()
416 931.898 MiB 0.000 MiB ax.cla()
417 931.898 MiB 0.000 MiB ax.clear()
418 931.898 MiB 0.000 MiB fig.clf()
419 931.898 MiB 0.000 MiB fig.clear()
420 931.898 MiB 0.000 MiB plt.clf()
421 932.352 MiB 0.453 MiB plt.cla()
422 932.352 MiB 0.000 MiB plt.close(fig)
423 932.352 MiB 0.000 MiB plt.close()
424 932.352 MiB 0.000 MiB del fig
425 932.352 MiB 0.000 MiB del ax
426 932.352 MiB 0.000 MiB del pathcoll
427 932.352 MiB 0.000 MiB del edgecolor_vec
428 932.352 MiB 0.000 MiB del canvas
429 505.094 MiB -427.258 MiB gc.collect()
430 505.094 MiB 0.000 MiB plt.close('all')
431 505.094 MiB 0.000 MiB gc.collect()
我尝试了所有 clear/release 的多种组合和不同顺序,但都无济于事。我试过不使用明确的 fig/canvas 创建,而只是使用 mpl.pyplot,结果相同。
有没有任何方法来释放这个内存,然后用我进来的122.312出去?
干杯!
Alex Martelli explains
It's very hard, in general, for a process to "give memory back to the OS"
(until the process terminates and the OS gets back all the memory, of course)
because (in most implementation) what malloc returns is carved out of big blocks
for efficiency, but the whole block can't be given back if any part of it is
still in use." So what you think is a memory leak may just be a side effect of
this. If so, fork can solve the problem.
Furthermore、
The only really reliable way to ensure that a large but
temporary use of memory DOES return all resources to the system when it's done,
is to have that use happen in a subprocess, which does the memory-hungry work
then terminates."
因此,您可以使用 multiprocessing
到 运行 [=12,而不是尝试清除图形和轴、删除引用和垃圾收集(所有这些都不起作用) =] 在单独的进程中:
import multiprocessing as mp
def plot_network_scatterplot(
t_sim_stop, spikes_mat, n_cells_per_area, n_cells, basedir_output,
condition_idx):
# make network scatterplot
w, h = plt.figaspect(.1/(t_sim_stop/1E3))
fig = mpl.figure.Figure(figsize=(10*w, 10*h))
canvas = FigureCanvas(fig)
ax = fig.add_axes([.01, .1, .98, .8])
edgecolor_vec = np.array([(1., 0., 0.), (0., 0., 1.)])[1-((spikes_mat[:,3]+1)/2).astype(np.int)]
'''pathcoll = ax.scatter(spikes_mat[:,1],
spikes_mat[:,0] + n_cells_per_area * (spikes_mat[:,2]-1),
s=.5,
c=spikes_mat[:,3],
edgecolor=edgecolor_vec)'''
pathcoll = ax.plot(np.random.rand(10000000), np.random.rand(10000000))
ax.set_xlim([0., t_sim_stop])
ax.set_ylim([1, n_cells])
plt.xlabel('Time [ms]')
plt.ylabel('Cell ID')
plt.suptitle('Network activity scatterplot')
canvas.print_figure(os.path.join(basedir_output, 'network_scatterplot-[cond=' + str(condition_idx) + '].png'))
def spawn(func, *args):
proc = mp.Process(target=func, args=args)
proc.start()
# wait until proc terminates.
proc.join()
if __name__ == '__main__':
spawn(plot_network_scatterplot, t_sim_stop, spikes_mat, n_cells_per_area,
n_cells, basedir_output, condition_idx)
这不是一个错误报告——即使这些泄漏可能是 mpl 错误的结果,请解释问题并寻求解决方法。
问题很简单:绘制大量数据(使用 plot() 或 scatter()),clear/release 一切,垃圾收集,但仍未释放几乎所有内存。
Line # Mem usage Increment Line Contents
================================================
391 122.312 MiB 0.000 MiB @profile
392 def plot_network_scatterplot(t_sim_stop, spikes_mat, n_cells_per_area, n_cells, basedir_output, condition_idx):
393
394 # make network scatterplot
395 122.312 MiB 0.000 MiB w, h = plt.figaspect(.1/(t_sim_stop/1E3))
396 122.324 MiB 0.012 MiB fig = mpl.figure.Figure(figsize=(10*w, 10*h))
397 122.328 MiB 0.004 MiB canvas = FigureCanvas(fig)
398 122.879 MiB 0.551 MiB ax = fig.add_axes([.01, .1, .98, .8])
399 134.879 MiB 12.000 MiB edgecolor_vec = np.array([(1., 0., 0.), (0., 0., 1.)])[1-((spikes_mat[:,3]+1)/2).astype(np.int)]
400 '''pathcoll = ax.scatter(spikes_mat[:,1],
401 spikes_mat[:,0] + n_cells_per_area * (spikes_mat[:,2]-1),
402 s=.5,
403 c=spikes_mat[:,3],
404 edgecolor=edgecolor_vec)'''
405 440.098 MiB 305.219 MiB pathcoll = ax.plot(np.random.rand(10000000), np.random.rand(10000000))
406 440.098 MiB 0.000 MiB ax.set_xlim([0., t_sim_stop])
407 440.098 MiB 0.000 MiB ax.set_ylim([1, n_cells])
408 440.098 MiB 0.000 MiB plt.xlabel('Time [ms]')
409 440.098 MiB 0.000 MiB plt.ylabel('Cell ID')
410 440.098 MiB 0.000 MiB plt.suptitle('Network activity scatterplot')
411 #plt.savefig(os.path.join(basedir_output, 'network_scatterplot-[cond=' + str(condition_idx) + '].png'))
412 931.898 MiB 491.801 MiB canvas.print_figure(os.path.join(basedir_output, 'network_scatterplot-[cond=' + str(condition_idx) + '].png'))
413 #fig.canvas.close()
414 #pathcoll.set_offsets([])
415 #pathcoll.remove()
416 931.898 MiB 0.000 MiB ax.cla()
417 931.898 MiB 0.000 MiB ax.clear()
418 931.898 MiB 0.000 MiB fig.clf()
419 931.898 MiB 0.000 MiB fig.clear()
420 931.898 MiB 0.000 MiB plt.clf()
421 932.352 MiB 0.453 MiB plt.cla()
422 932.352 MiB 0.000 MiB plt.close(fig)
423 932.352 MiB 0.000 MiB plt.close()
424 932.352 MiB 0.000 MiB del fig
425 932.352 MiB 0.000 MiB del ax
426 932.352 MiB 0.000 MiB del pathcoll
427 932.352 MiB 0.000 MiB del edgecolor_vec
428 932.352 MiB 0.000 MiB del canvas
429 505.094 MiB -427.258 MiB gc.collect()
430 505.094 MiB 0.000 MiB plt.close('all')
431 505.094 MiB 0.000 MiB gc.collect()
我尝试了所有 clear/release 的多种组合和不同顺序,但都无济于事。我试过不使用明确的 fig/canvas 创建,而只是使用 mpl.pyplot,结果相同。
有没有任何方法来释放这个内存,然后用我进来的122.312出去?
干杯!
Alex Martelli explains
It's very hard, in general, for a process to "give memory back to the OS" (until the process terminates and the OS gets back all the memory, of course) because (in most implementation) what malloc returns is carved out of big blocks for efficiency, but the whole block can't be given back if any part of it is still in use." So what you think is a memory leak may just be a side effect of this. If so, fork can solve the problem.
Furthermore、
The only really reliable way to ensure that a large but temporary use of memory DOES return all resources to the system when it's done, is to have that use happen in a subprocess, which does the memory-hungry work then terminates."
因此,您可以使用 multiprocessing
到 运行 [=12,而不是尝试清除图形和轴、删除引用和垃圾收集(所有这些都不起作用) =] 在单独的进程中:
import multiprocessing as mp
def plot_network_scatterplot(
t_sim_stop, spikes_mat, n_cells_per_area, n_cells, basedir_output,
condition_idx):
# make network scatterplot
w, h = plt.figaspect(.1/(t_sim_stop/1E3))
fig = mpl.figure.Figure(figsize=(10*w, 10*h))
canvas = FigureCanvas(fig)
ax = fig.add_axes([.01, .1, .98, .8])
edgecolor_vec = np.array([(1., 0., 0.), (0., 0., 1.)])[1-((spikes_mat[:,3]+1)/2).astype(np.int)]
'''pathcoll = ax.scatter(spikes_mat[:,1],
spikes_mat[:,0] + n_cells_per_area * (spikes_mat[:,2]-1),
s=.5,
c=spikes_mat[:,3],
edgecolor=edgecolor_vec)'''
pathcoll = ax.plot(np.random.rand(10000000), np.random.rand(10000000))
ax.set_xlim([0., t_sim_stop])
ax.set_ylim([1, n_cells])
plt.xlabel('Time [ms]')
plt.ylabel('Cell ID')
plt.suptitle('Network activity scatterplot')
canvas.print_figure(os.path.join(basedir_output, 'network_scatterplot-[cond=' + str(condition_idx) + '].png'))
def spawn(func, *args):
proc = mp.Process(target=func, args=args)
proc.start()
# wait until proc terminates.
proc.join()
if __name__ == '__main__':
spawn(plot_network_scatterplot, t_sim_stop, spikes_mat, n_cells_per_area,
n_cells, basedir_output, condition_idx)