使用 cProfile 分析一个完美运行的多处理 python 脚本时出错
Error when profiling an otherwise perfectly working multiprocessing python script with cProfile
我编写了一个使用 multiprocessing
的 python 小脚本(参见 )。当我测试它时它有效:
$ ./forkiter.py
0
1
2
3
4
sum of x+1: 15
sum of 2*x: 20
sum of x*x: 30
但是当我尝试使用 cProfile
对其进行分析时,我得到以下信息:
$ python3.6 -m cProfile -o forkiter.prof ./forkiter.py
0
1
2
3
4
Traceback (most recent call last):
File "/home/bli/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/home/bli/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/bli/lib/python3.6/cProfile.py", line 160, in <module>
main()
File "/home/bli/lib/python3.6/cProfile.py", line 153, in main
runctx(code, globs, None, options.outfile, options.sort)
File "/home/bli/lib/python3.6/cProfile.py", line 20, in runctx
filename, sort)
File "/home/bli/lib/python3.6/profile.py", line 64, in runctx
prof.runctx(statement, globals, locals)
File "/home/bli/lib/python3.6/cProfile.py", line 100, in runctx
exec(cmd, globals, locals)
File "./forkiter.py", line 71, in <module>
exit(main())
File "./forkiter.py", line 67, in main
sum_tuples, results_generator))
File "/home/bli/lib/python3.6/multiprocessing/pool.py", line 699, in next
raise value
File "/home/bli/lib/python3.6/multiprocessing/pool.py", line 385, in _handle_tasks
put(task)
File "/home/bli/lib/python3.6/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/home/bli/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
会发生什么?
这是脚本:
#!/usr/bin/env python3
"""This script tries to work around some limitations of multiprocessing."""
from itertools import repeat, starmap
from multiprocessing import Pool
from functools import reduce
from operator import add
from time import sleep
# Doesn't work because local functions can't be pickled:
# def make_tuple_func(funcs):
# def tuple_func(args_list):
# return tuple(func(args) for func, args in zip(funcs, args_list))
# return tuple_func
#
# test_tuple_func = make_tuple_func((plus_one, double, square))
class FuncApplier(object):
"""This kind of object can be used to group functions and call them on a
tuple of arguments."""
__slots__ = ("funcs", )
def __init__(self, funcs):
self.funcs = funcs
def __len__(self):
return len(self.funcs)
def __call__(self, args_list):
return tuple(func(args) for func, args in zip(self.funcs, args_list))
def fork_args(self, args_list):
"""Takes an arguments list and repeat them in a n-tuple."""
return tuple(repeat(args_list, len(self)))
def sum_tuples(*tuples):
"""Element-wise sum of tuple items."""
return tuple(starmap(add, zip(*tuples)))
# Can't define these functions in main:
# They wouldn't be pickleable.
def plus_one(x):
return x + 1
def double(x):
return 2 * x
def square(x):
return x * x
def main():
def my_generator():
for i in range(5):
print(i)
yield i
test_tuple_func = FuncApplier((plus_one, double, square))
with Pool(processes=5) as pool:
results_generator = pool.imap_unordered(
test_tuple_func,
(test_tuple_func.fork_args(args_list) for args_list in my_generator()))
print("sum of x+1:\t%s\nsum of 2*x:\t%s\nsum of x*x:\t%s" % reduce(
sum_tuples, results_generator))
exit(0)
if __name__ == "__main__":
exit(main())
一些酸洗测试
一些研究告诉我,有时对象需要 __setstate__
和 __getstate__
方法才能被 picklable。这有助于某些 pickling 协议,但这似乎并不能解决 cProfile
案例中的问题。
请参阅下面的测试。
更新的脚本:
#!/usr/bin/env python3
"""This script tries to work around some limitations of multiprocessing."""
from itertools import repeat, starmap
from multiprocessing import Pool
from functools import reduce
from operator import add
from time import sleep
import pickle
# Doesn't work because local functions can't be pickled:
# def make_tuple_func(funcs):
# def tuple_func(args_list):
# return tuple(func(args) for func, args in zip(funcs, args_list))
# return tuple_func
#
# test_tuple_func = make_tuple_func((plus_one, double, square))
class FuncApplier(object):
"""This kind of object can be used to group functions and call them on a
tuple of arguments."""
__slots__ = ("funcs", )
def __init__(self, funcs):
self.funcs = funcs
def __len__(self):
return len(self.funcs)
def __call__(self, args_list):
return tuple(func(args) for func, args in zip(self.funcs, args_list))
# Attempt to make it pickleable when under cProfile (doesn't help)
def __getstate__(self):
return self.funcs
def __setstate__(self, state):
self.funcs = state
def fork_args(self, args_list):
"""Takes an arguments list and repeat them in a n-tuple."""
return tuple(repeat(args_list, len(self)))
def sum_tuples(*tuples):
"""Element-wise sum of tuple items."""
return tuple(starmap(add, zip(*tuples)))
# Can't define these functions in main:
# They wouldn't be pickleable.
def plus_one(x):
return x + 1
def double(x):
return 2 * x
def square(x):
return x * x
def main():
def my_generator():
for i in range(5):
print(i)
yield i
test_tuple_func = FuncApplier((plus_one, double, square))
print("protocol 0")
try:
print(pickle.dumps(test_tuple_func, 0))
except pickle.PicklingError as err:
print("failed with the following error:\n%s" % err)
print("protocol 1")
try:
print(pickle.dumps(test_tuple_func, 0))
except pickle.PicklingError as err:
print("failed with the following error:\n%s" % err)
print("protocol 2")
try:
print(pickle.dumps(test_tuple_func, 0))
except pickle.PicklingError as err:
print("failed with the following error:\n%s" % err)
print("protocol 3")
try:
print(pickle.dumps(test_tuple_func, 0))
except pickle.PicklingError as err:
print("failed with the following error:\n%s" % err)
print("protocol 4")
try:
print(pickle.dumps(test_tuple_func, 0))
except pickle.PicklingError as err:
print("failed with the following error:\n%s" % err)
with Pool(processes=5) as pool:
results_generator = pool.imap_unordered(
test_tuple_func,
(test_tuple_func.fork_args(args_list) for args_list in my_generator()))
print("sum of x+1:\t%s\nsum of 2*x:\t%s\nsum of x*x:\t%s" % reduce(
sum_tuples, results_generator))
exit(0)
if __name__ == "__main__":
exit(main())
没有 cProfile
的测试似乎没问题:
$ ./forkiter.py
protocol 0
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
protocol 1
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
protocol 2
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
protocol 3
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
protocol 4
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
0
1
2
3
4
sum of x+1: 15
sum of 2*x: 20
sum of x*x: 30
cProfile 下的测试在每个酸洗协议中都失败了(因此在多处理中也是如此):
$ python3.6 -m cProfile -o forkiter.prof ./forkiter.py
protocol 0
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
protocol 1
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
protocol 2
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
protocol 3
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
protocol 4
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
0
1
2
3
4
Traceback (most recent call last):
File "/home/bli/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/home/bli/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/bli/lib/python3.6/cProfile.py", line 160, in <module>
main()
File "/home/bli/lib/python3.6/cProfile.py", line 153, in main
runctx(code, globs, None, options.outfile, options.sort)
File "/home/bli/lib/python3.6/cProfile.py", line 20, in runctx
filename, sort)
File "/home/bli/lib/python3.6/profile.py", line 64, in runctx
prof.runctx(statement, globals, locals)
File "/home/bli/lib/python3.6/cProfile.py", line 100, in runctx
exec(cmd, globals, locals)
File "./forkiter.py", line 105, in <module>
exit(main())
File "./forkiter.py", line 101, in main
sum_tuples, results_generator))
File "/home/bli/lib/python3.6/multiprocessing/pool.py", line 699, in next
raise value
File "/home/bli/lib/python3.6/multiprocessing/pool.py", line 385, in _handle_tasks
put(task)
File "/home/bli/lib/python3.6/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/home/bli/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
似乎cProfile
根本不适用于多处理。
如果您乐于修改代码,只分析主进程(或为子进程添加特定的分析),cProfile.run()
似乎在一定程度上起作用。
在你的例子中,替换
exit(main())
与
exit(cProfile.run('main()')
如果并行函数是一个全局作用域函数,那至少是有效的,不确定对于你的情况下的 class 也是如此。
我编写了一个使用 multiprocessing
的 python 小脚本(参见
$ ./forkiter.py
0
1
2
3
4
sum of x+1: 15
sum of 2*x: 20
sum of x*x: 30
但是当我尝试使用 cProfile
对其进行分析时,我得到以下信息:
$ python3.6 -m cProfile -o forkiter.prof ./forkiter.py
0
1
2
3
4
Traceback (most recent call last):
File "/home/bli/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/home/bli/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/bli/lib/python3.6/cProfile.py", line 160, in <module>
main()
File "/home/bli/lib/python3.6/cProfile.py", line 153, in main
runctx(code, globs, None, options.outfile, options.sort)
File "/home/bli/lib/python3.6/cProfile.py", line 20, in runctx
filename, sort)
File "/home/bli/lib/python3.6/profile.py", line 64, in runctx
prof.runctx(statement, globals, locals)
File "/home/bli/lib/python3.6/cProfile.py", line 100, in runctx
exec(cmd, globals, locals)
File "./forkiter.py", line 71, in <module>
exit(main())
File "./forkiter.py", line 67, in main
sum_tuples, results_generator))
File "/home/bli/lib/python3.6/multiprocessing/pool.py", line 699, in next
raise value
File "/home/bli/lib/python3.6/multiprocessing/pool.py", line 385, in _handle_tasks
put(task)
File "/home/bli/lib/python3.6/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/home/bli/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
会发生什么?
这是脚本:
#!/usr/bin/env python3
"""This script tries to work around some limitations of multiprocessing."""
from itertools import repeat, starmap
from multiprocessing import Pool
from functools import reduce
from operator import add
from time import sleep
# Doesn't work because local functions can't be pickled:
# def make_tuple_func(funcs):
# def tuple_func(args_list):
# return tuple(func(args) for func, args in zip(funcs, args_list))
# return tuple_func
#
# test_tuple_func = make_tuple_func((plus_one, double, square))
class FuncApplier(object):
"""This kind of object can be used to group functions and call them on a
tuple of arguments."""
__slots__ = ("funcs", )
def __init__(self, funcs):
self.funcs = funcs
def __len__(self):
return len(self.funcs)
def __call__(self, args_list):
return tuple(func(args) for func, args in zip(self.funcs, args_list))
def fork_args(self, args_list):
"""Takes an arguments list and repeat them in a n-tuple."""
return tuple(repeat(args_list, len(self)))
def sum_tuples(*tuples):
"""Element-wise sum of tuple items."""
return tuple(starmap(add, zip(*tuples)))
# Can't define these functions in main:
# They wouldn't be pickleable.
def plus_one(x):
return x + 1
def double(x):
return 2 * x
def square(x):
return x * x
def main():
def my_generator():
for i in range(5):
print(i)
yield i
test_tuple_func = FuncApplier((plus_one, double, square))
with Pool(processes=5) as pool:
results_generator = pool.imap_unordered(
test_tuple_func,
(test_tuple_func.fork_args(args_list) for args_list in my_generator()))
print("sum of x+1:\t%s\nsum of 2*x:\t%s\nsum of x*x:\t%s" % reduce(
sum_tuples, results_generator))
exit(0)
if __name__ == "__main__":
exit(main())
一些酸洗测试
一些研究告诉我,有时对象需要 __setstate__
和 __getstate__
方法才能被 picklable。这有助于某些 pickling 协议,但这似乎并不能解决 cProfile
案例中的问题。
请参阅下面的测试。
更新的脚本:
#!/usr/bin/env python3
"""This script tries to work around some limitations of multiprocessing."""
from itertools import repeat, starmap
from multiprocessing import Pool
from functools import reduce
from operator import add
from time import sleep
import pickle
# Doesn't work because local functions can't be pickled:
# def make_tuple_func(funcs):
# def tuple_func(args_list):
# return tuple(func(args) for func, args in zip(funcs, args_list))
# return tuple_func
#
# test_tuple_func = make_tuple_func((plus_one, double, square))
class FuncApplier(object):
"""This kind of object can be used to group functions and call them on a
tuple of arguments."""
__slots__ = ("funcs", )
def __init__(self, funcs):
self.funcs = funcs
def __len__(self):
return len(self.funcs)
def __call__(self, args_list):
return tuple(func(args) for func, args in zip(self.funcs, args_list))
# Attempt to make it pickleable when under cProfile (doesn't help)
def __getstate__(self):
return self.funcs
def __setstate__(self, state):
self.funcs = state
def fork_args(self, args_list):
"""Takes an arguments list and repeat them in a n-tuple."""
return tuple(repeat(args_list, len(self)))
def sum_tuples(*tuples):
"""Element-wise sum of tuple items."""
return tuple(starmap(add, zip(*tuples)))
# Can't define these functions in main:
# They wouldn't be pickleable.
def plus_one(x):
return x + 1
def double(x):
return 2 * x
def square(x):
return x * x
def main():
def my_generator():
for i in range(5):
print(i)
yield i
test_tuple_func = FuncApplier((plus_one, double, square))
print("protocol 0")
try:
print(pickle.dumps(test_tuple_func, 0))
except pickle.PicklingError as err:
print("failed with the following error:\n%s" % err)
print("protocol 1")
try:
print(pickle.dumps(test_tuple_func, 0))
except pickle.PicklingError as err:
print("failed with the following error:\n%s" % err)
print("protocol 2")
try:
print(pickle.dumps(test_tuple_func, 0))
except pickle.PicklingError as err:
print("failed with the following error:\n%s" % err)
print("protocol 3")
try:
print(pickle.dumps(test_tuple_func, 0))
except pickle.PicklingError as err:
print("failed with the following error:\n%s" % err)
print("protocol 4")
try:
print(pickle.dumps(test_tuple_func, 0))
except pickle.PicklingError as err:
print("failed with the following error:\n%s" % err)
with Pool(processes=5) as pool:
results_generator = pool.imap_unordered(
test_tuple_func,
(test_tuple_func.fork_args(args_list) for args_list in my_generator()))
print("sum of x+1:\t%s\nsum of 2*x:\t%s\nsum of x*x:\t%s" % reduce(
sum_tuples, results_generator))
exit(0)
if __name__ == "__main__":
exit(main())
没有 cProfile
的测试似乎没问题:
$ ./forkiter.py
protocol 0
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
protocol 1
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
protocol 2
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
protocol 3
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
protocol 4
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
0
1
2
3
4
sum of x+1: 15
sum of 2*x: 20
sum of x*x: 30
cProfile 下的测试在每个酸洗协议中都失败了(因此在多处理中也是如此):
$ python3.6 -m cProfile -o forkiter.prof ./forkiter.py
protocol 0
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
protocol 1
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
protocol 2
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
protocol 3
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
protocol 4
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
0
1
2
3
4
Traceback (most recent call last):
File "/home/bli/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/home/bli/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/bli/lib/python3.6/cProfile.py", line 160, in <module>
main()
File "/home/bli/lib/python3.6/cProfile.py", line 153, in main
runctx(code, globs, None, options.outfile, options.sort)
File "/home/bli/lib/python3.6/cProfile.py", line 20, in runctx
filename, sort)
File "/home/bli/lib/python3.6/profile.py", line 64, in runctx
prof.runctx(statement, globals, locals)
File "/home/bli/lib/python3.6/cProfile.py", line 100, in runctx
exec(cmd, globals, locals)
File "./forkiter.py", line 105, in <module>
exit(main())
File "./forkiter.py", line 101, in main
sum_tuples, results_generator))
File "/home/bli/lib/python3.6/multiprocessing/pool.py", line 699, in next
raise value
File "/home/bli/lib/python3.6/multiprocessing/pool.py", line 385, in _handle_tasks
put(task)
File "/home/bli/lib/python3.6/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/home/bli/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
似乎cProfile
根本不适用于多处理。
如果您乐于修改代码,只分析主进程(或为子进程添加特定的分析),cProfile.run()
似乎在一定程度上起作用。
在你的例子中,替换
exit(main())
与
exit(cProfile.run('main()')
如果并行函数是一个全局作用域函数,那至少是有效的,不确定对于你的情况下的 class 也是如此。