函数调用开销 - 为什么内置 Python 内置看起来比我的内置更快?
Function call overhead - why do builtin Python builtins appear to be faster than my builtins?
我一直对开销很感兴趣,所以我写了一个最小的 C 扩展,导出两个函数 nop
和 starnop
,它们或多或少什么都不做。他们只是通过他们的输入(两个相关的函数就在顶部,其余的只是乏味的样板代码):
amanmodule.c:
#include <Python.h>
static PyObject* aman_nop(PyObject *self, PyObject *args)
{
PyObject *obj;
if (!PyArg_UnpackTuple(args, "arg", 1, 1, &obj))
return NULL;
Py_INCREF(obj);
return obj;
}
static PyObject* aman_starnop(PyObject *self, PyObject *args)
{
Py_INCREF(args);
return args;
}
static PyMethodDef AmanMethods[] = {
{"nop", (PyCFunction)aman_nop, METH_VARARGS,
PyDoc_STR("nop(arg) -> arg\n\nReturn arg unchanged.")},
{"starnop", (PyCFunction)aman_starnop, METH_VARARGS,
PyDoc_STR("starnop(*args) -> args\n\nReturn tuple of args unchanged")},
{NULL, NULL}
};
static struct PyModuleDef amanmodule = {
PyModuleDef_HEAD_INIT,
"aman",
"aman - a module about nothing.\n\n"
"Provides functions 'nop' and 'starnop' which do nothing:\n"
"nop(arg) -> arg; starnop(*args) -> args\n",
-1,
AmanMethods
};
PyMODINIT_FUNC
PyInit_aman(void)
{
return PyModule_Create(&amanmodule);
}
setup.py:
from setuptools import setup, extension
setup(name='aman', version='1.0',
ext_modules=[extension.Extension('aman', ['amanmodule.c'])],
author='n.n.',
description="""aman - a module about nothing
Provides functions 'nop' and 'starnop' which do nothing:
nop(arg) -> arg; starnop(*args) -> args
""",
license='public domain',
keywords='nop pass-through identity')
接下来,我将它们与纯粹的 Python 实现和一些几乎什么都不做的内置函数进行比较:
import numpy as np
from aman import nop, starnop
from timeit import timeit
def mnsd(x): return '{:8.6f} \u00b1 {:8.6f} \u00b5s'.format(np.mean(x), np.std(x))
def pnp(x): x
globals={}
for globals['nop'] in (int, bool, (0).__add__, hash, starnop, nop, pnp, lambda x: x):
print('{:60s}'.format(repr(globals['nop'])),
mnsd([timeit('nop(1)', globals=globals) for i in range(10)]),
' ',
mnsd([timeit('nop(True)',globals=globals) for i in range(10)]))
第一个问题我不是在做一些在方法论方面迟钝的事情吗?
10 个块的结果,每个块有 1,000,000 个调用:
<class 'int'> 0.099754 ± 0.003917 µs 0.103933 ± 0.000585 µs
<class 'bool'> 0.097711 ± 0.000661 µs 0.094412 ± 0.000612 µs
<method-wrapper '__add__' of int object at 0x8c7000> 0.065146 ± 0.000728 µs 0.064976 ± 0.000605 µs
<built-in function hash> 0.039546 ± 0.000671 µs 0.039566 ± 0.000452 µs
<built-in function starnop> 0.056490 ± 0.000873 µs 0.056234 ± 0.000181 µs
<built-in function nop> 0.060094 ± 0.000799 µs 0.059959 ± 0.000170 µs
<function pnp at 0x7fa31c0512f0> 0.090452 ± 0.001077 µs 0.098479 ± 0.003314 µs
<function <lambda> at 0x7fa31c051378> 0.086387 ± 0.000817 µs 0.086536 ± 0.000714 µs
现在我的实际问题是:即使我的 nops 是用 C 编写的并且什么也不做(starnop
甚至不解析它的参数)内置的 hash
函数始终更快。我知道整数在 Python 中是它们自己的哈希值,所以 hash
在这里也是一个 nop,但它并不比我的 nop 更高,所以为什么速度不同?
更新: 完全忘记了:我在一台相当标准的 x86_64 机器上,linux gcc4.8.5。我使用 python3 setup.py install --user
.
安装的扩展
Python 函数调用中的大部分(大部分?)开销是创建 args
元组。参数解析也增加了一些开销。
函数定义使用 METH_VARARGS
调用约定需要创建一个元组来存储所有参数。如果只需要一个参数,可以使用 METH_O
调用约定。使用 METH_O
,不创建元组。直接传递单个参数。我在您使用 METH_O
.
的示例中添加了一个 nop1
可以使用 METH_NOARGS
定义不需要参数的函数。请参阅 nop2
以获得尽可能少的开销。
当使用 METH_VARARGS
时,可以通过直接解析 args
元组而不是调用 PyArg_UnpackTuple
或相关的 PyArg_
函数来稍微减少开销。它稍微快一些。参见 nop3
。
内置 hash()
函数使用了 METH_O
调用约定。
已修改amanmodule.c
#include <Python.h>
static PyObject* aman_nop(PyObject *self, PyObject *args)
{
PyObject *obj;
if (!PyArg_UnpackTuple(args, "arg", 1, 1, &obj))
return NULL;
Py_INCREF(obj);
return obj;
}
static PyObject* aman_nop1(PyObject *self, PyObject *other)
{
Py_INCREF(other);
return other;
}
static PyObject* aman_nop2(PyObject *self)
{
Py_RETURN_NONE;
}
static PyObject* aman_nop3(PyObject *self, PyObject *args)
{
PyObject *obj;
if (PyTuple_GET_SIZE(args) == 1) {
obj = PyTuple_GET_ITEM(args, 0);
Py_INCREF(obj);
return obj;
}
else {
PyErr_SetString(PyExc_TypeError, "nop3 requires 1 argument");
return NULL;
}
}
static PyObject* aman_starnop(PyObject *self, PyObject *args)
{
Py_INCREF(args);
return args;
}
static PyMethodDef AmanMethods[] = {
{"nop", (PyCFunction)aman_nop, METH_VARARGS,
PyDoc_STR("nop(arg) -> arg\n\nReturn arg unchanged.")},
{"nop1", (PyCFunction)aman_nop1, METH_O,
PyDoc_STR("nop(arg) -> arg\n\nReturn arg unchanged.")},
{"nop2", (PyCFunction)aman_nop2, METH_NOARGS,
PyDoc_STR("nop(arg) -> arg\n\nReturn arg unchanged.")},
{"nop3", (PyCFunction)aman_nop3, METH_VARARGS,
PyDoc_STR("nop(arg) -> arg\n\nReturn arg unchanged.")},
{"starnop", (PyCFunction)aman_starnop, METH_VARARGS,
PyDoc_STR("starnop(*args) -> args\n\nReturn tuple of args unchanged")},
{NULL, NULL}
};
static struct PyModuleDef amanmodule = {
PyModuleDef_HEAD_INIT,
"aman",
"aman - a module about nothing.\n\n"
"Provides functions 'nop' and 'starnop' which do nothing:\n"
"nop(arg) -> arg; starnop(*args) -> args\n",
-1,
AmanMethods
};
PyMODINIT_FUNC
PyInit_aman(void)
{
return PyModule_Create(&amanmodule);
}
已修改test.py
import numpy as np
from aman import nop, nop1, nop2, nop3, starnop
from timeit import timeit
def mnsd(x): return '{:8.6f} \u00b1 {:8.6f} \u00b5s'.format(np.mean(x), np.std(x))
def pnp(x): x
globals={}
for globals['nop'] in (int, bool, (0).__add__, hash, starnop, nop, nop1, nop3, pnp, lambda x: x):
print('{:60s}'.format(repr(globals['nop'])),
mnsd([timeit('nop(1)', globals=globals) for i in range(10)]),
' ',
mnsd([timeit('nop(True)',globals=globals) for i in range(10)]))
# To test with no arguments
for globals['nop'] in (nop2,):
print('{:60s}'.format(repr(globals['nop'])),
mnsd([timeit('nop()', globals=globals) for i in range(10)]),
' ',
mnsd([timeit('nop()',globals=globals) for i in range(10)]))
结果
$ python3 test.py
<class 'int'> 0.080414 ± 0.004360 µs 0.086166 ± 0.003216 µs
<class 'bool'> 0.080501 ± 0.008929 µs 0.075601 ± 0.000598 µs
<method-wrapper '__add__' of int object at 0xa6dca0> 0.045652 ± 0.004229 µs 0.044146 ± 0.000114 µs
<built-in function hash> 0.035122 ± 0.003317 µs 0.033419 ± 0.000136 µs
<built-in function starnop> 0.044056 ± 0.001300 µs 0.044280 ± 0.001629 µs
<built-in function nop> 0.047297 ± 0.000777 µs 0.049536 ± 0.007577 µs
<built-in function nop1> 0.030402 ± 0.001423 µs 0.031249 ± 0.002352 µs
<built-in function nop3> 0.044673 ± 0.004041 µs 0.042936 ± 0.000177 µs
<function pnp at 0x7f946342d840> 0.071846 ± 0.005377 µs 0.071085 ± 0.003314 µs
<function <lambda> at 0x7f946342d8c8> 0.066621 ± 0.001499 µs 0.067163 ± 0.002962 µs
<built-in function nop2> 0.027736 ± 0.001487 µs 0.027035 ± 0.000397 µs
我一直对开销很感兴趣,所以我写了一个最小的 C 扩展,导出两个函数 nop
和 starnop
,它们或多或少什么都不做。他们只是通过他们的输入(两个相关的函数就在顶部,其余的只是乏味的样板代码):
amanmodule.c:
#include <Python.h>
static PyObject* aman_nop(PyObject *self, PyObject *args)
{
PyObject *obj;
if (!PyArg_UnpackTuple(args, "arg", 1, 1, &obj))
return NULL;
Py_INCREF(obj);
return obj;
}
static PyObject* aman_starnop(PyObject *self, PyObject *args)
{
Py_INCREF(args);
return args;
}
static PyMethodDef AmanMethods[] = {
{"nop", (PyCFunction)aman_nop, METH_VARARGS,
PyDoc_STR("nop(arg) -> arg\n\nReturn arg unchanged.")},
{"starnop", (PyCFunction)aman_starnop, METH_VARARGS,
PyDoc_STR("starnop(*args) -> args\n\nReturn tuple of args unchanged")},
{NULL, NULL}
};
static struct PyModuleDef amanmodule = {
PyModuleDef_HEAD_INIT,
"aman",
"aman - a module about nothing.\n\n"
"Provides functions 'nop' and 'starnop' which do nothing:\n"
"nop(arg) -> arg; starnop(*args) -> args\n",
-1,
AmanMethods
};
PyMODINIT_FUNC
PyInit_aman(void)
{
return PyModule_Create(&amanmodule);
}
setup.py:
from setuptools import setup, extension
setup(name='aman', version='1.0',
ext_modules=[extension.Extension('aman', ['amanmodule.c'])],
author='n.n.',
description="""aman - a module about nothing
Provides functions 'nop' and 'starnop' which do nothing:
nop(arg) -> arg; starnop(*args) -> args
""",
license='public domain',
keywords='nop pass-through identity')
接下来,我将它们与纯粹的 Python 实现和一些几乎什么都不做的内置函数进行比较:
import numpy as np
from aman import nop, starnop
from timeit import timeit
def mnsd(x): return '{:8.6f} \u00b1 {:8.6f} \u00b5s'.format(np.mean(x), np.std(x))
def pnp(x): x
globals={}
for globals['nop'] in (int, bool, (0).__add__, hash, starnop, nop, pnp, lambda x: x):
print('{:60s}'.format(repr(globals['nop'])),
mnsd([timeit('nop(1)', globals=globals) for i in range(10)]),
' ',
mnsd([timeit('nop(True)',globals=globals) for i in range(10)]))
第一个问题我不是在做一些在方法论方面迟钝的事情吗?
10 个块的结果,每个块有 1,000,000 个调用:
<class 'int'> 0.099754 ± 0.003917 µs 0.103933 ± 0.000585 µs
<class 'bool'> 0.097711 ± 0.000661 µs 0.094412 ± 0.000612 µs
<method-wrapper '__add__' of int object at 0x8c7000> 0.065146 ± 0.000728 µs 0.064976 ± 0.000605 µs
<built-in function hash> 0.039546 ± 0.000671 µs 0.039566 ± 0.000452 µs
<built-in function starnop> 0.056490 ± 0.000873 µs 0.056234 ± 0.000181 µs
<built-in function nop> 0.060094 ± 0.000799 µs 0.059959 ± 0.000170 µs
<function pnp at 0x7fa31c0512f0> 0.090452 ± 0.001077 µs 0.098479 ± 0.003314 µs
<function <lambda> at 0x7fa31c051378> 0.086387 ± 0.000817 µs 0.086536 ± 0.000714 µs
现在我的实际问题是:即使我的 nops 是用 C 编写的并且什么也不做(starnop
甚至不解析它的参数)内置的 hash
函数始终更快。我知道整数在 Python 中是它们自己的哈希值,所以 hash
在这里也是一个 nop,但它并不比我的 nop 更高,所以为什么速度不同?
更新: 完全忘记了:我在一台相当标准的 x86_64 机器上,linux gcc4.8.5。我使用 python3 setup.py install --user
.
Python 函数调用中的大部分(大部分?)开销是创建 args
元组。参数解析也增加了一些开销。
函数定义使用 METH_VARARGS
调用约定需要创建一个元组来存储所有参数。如果只需要一个参数,可以使用 METH_O
调用约定。使用 METH_O
,不创建元组。直接传递单个参数。我在您使用 METH_O
.
nop1
可以使用 METH_NOARGS
定义不需要参数的函数。请参阅 nop2
以获得尽可能少的开销。
当使用 METH_VARARGS
时,可以通过直接解析 args
元组而不是调用 PyArg_UnpackTuple
或相关的 PyArg_
函数来稍微减少开销。它稍微快一些。参见 nop3
。
内置 hash()
函数使用了 METH_O
调用约定。
已修改amanmodule.c
#include <Python.h>
static PyObject* aman_nop(PyObject *self, PyObject *args)
{
PyObject *obj;
if (!PyArg_UnpackTuple(args, "arg", 1, 1, &obj))
return NULL;
Py_INCREF(obj);
return obj;
}
static PyObject* aman_nop1(PyObject *self, PyObject *other)
{
Py_INCREF(other);
return other;
}
static PyObject* aman_nop2(PyObject *self)
{
Py_RETURN_NONE;
}
static PyObject* aman_nop3(PyObject *self, PyObject *args)
{
PyObject *obj;
if (PyTuple_GET_SIZE(args) == 1) {
obj = PyTuple_GET_ITEM(args, 0);
Py_INCREF(obj);
return obj;
}
else {
PyErr_SetString(PyExc_TypeError, "nop3 requires 1 argument");
return NULL;
}
}
static PyObject* aman_starnop(PyObject *self, PyObject *args)
{
Py_INCREF(args);
return args;
}
static PyMethodDef AmanMethods[] = {
{"nop", (PyCFunction)aman_nop, METH_VARARGS,
PyDoc_STR("nop(arg) -> arg\n\nReturn arg unchanged.")},
{"nop1", (PyCFunction)aman_nop1, METH_O,
PyDoc_STR("nop(arg) -> arg\n\nReturn arg unchanged.")},
{"nop2", (PyCFunction)aman_nop2, METH_NOARGS,
PyDoc_STR("nop(arg) -> arg\n\nReturn arg unchanged.")},
{"nop3", (PyCFunction)aman_nop3, METH_VARARGS,
PyDoc_STR("nop(arg) -> arg\n\nReturn arg unchanged.")},
{"starnop", (PyCFunction)aman_starnop, METH_VARARGS,
PyDoc_STR("starnop(*args) -> args\n\nReturn tuple of args unchanged")},
{NULL, NULL}
};
static struct PyModuleDef amanmodule = {
PyModuleDef_HEAD_INIT,
"aman",
"aman - a module about nothing.\n\n"
"Provides functions 'nop' and 'starnop' which do nothing:\n"
"nop(arg) -> arg; starnop(*args) -> args\n",
-1,
AmanMethods
};
PyMODINIT_FUNC
PyInit_aman(void)
{
return PyModule_Create(&amanmodule);
}
已修改test.py
import numpy as np
from aman import nop, nop1, nop2, nop3, starnop
from timeit import timeit
def mnsd(x): return '{:8.6f} \u00b1 {:8.6f} \u00b5s'.format(np.mean(x), np.std(x))
def pnp(x): x
globals={}
for globals['nop'] in (int, bool, (0).__add__, hash, starnop, nop, nop1, nop3, pnp, lambda x: x):
print('{:60s}'.format(repr(globals['nop'])),
mnsd([timeit('nop(1)', globals=globals) for i in range(10)]),
' ',
mnsd([timeit('nop(True)',globals=globals) for i in range(10)]))
# To test with no arguments
for globals['nop'] in (nop2,):
print('{:60s}'.format(repr(globals['nop'])),
mnsd([timeit('nop()', globals=globals) for i in range(10)]),
' ',
mnsd([timeit('nop()',globals=globals) for i in range(10)]))
结果
$ python3 test.py
<class 'int'> 0.080414 ± 0.004360 µs 0.086166 ± 0.003216 µs
<class 'bool'> 0.080501 ± 0.008929 µs 0.075601 ± 0.000598 µs
<method-wrapper '__add__' of int object at 0xa6dca0> 0.045652 ± 0.004229 µs 0.044146 ± 0.000114 µs
<built-in function hash> 0.035122 ± 0.003317 µs 0.033419 ± 0.000136 µs
<built-in function starnop> 0.044056 ± 0.001300 µs 0.044280 ± 0.001629 µs
<built-in function nop> 0.047297 ± 0.000777 µs 0.049536 ± 0.007577 µs
<built-in function nop1> 0.030402 ± 0.001423 µs 0.031249 ± 0.002352 µs
<built-in function nop3> 0.044673 ± 0.004041 µs 0.042936 ± 0.000177 µs
<function pnp at 0x7f946342d840> 0.071846 ± 0.005377 µs 0.071085 ± 0.003314 µs
<function <lambda> at 0x7f946342d8c8> 0.066621 ± 0.001499 µs 0.067163 ± 0.002962 µs
<built-in function nop2> 0.027736 ± 0.001487 µs 0.027035 ± 0.000397 µs