执行 Numba 生成的程序集
Executing the assembly generated by Numba
在奇怪的事件中,我陷入了以下困境,我使用以下 Python 代码将 Numba 生成的程序集写入文件:
@jit(nopython=True, nogil=True)
def six():
return 6
with open("six.asm", "w") as f:
for k, v in six.inspect_asm().items():
f.write(v)
汇编代码已成功写入文件,但我不知道如何执行它。我尝试了以下方法:
$ as -o six.o six.asm
$ ld six.o -o six.bin
$ chmod +x six.bin
$ ./six.bin
但是,linking 步骤失败并显示以下内容:
ld: warning: cannot find entry symbol _start; defaulting to 00000000004000f0
six.o: In function `cpython::__main__::six1':
<string>:(.text+0x20): undefined reference to `PyArg_UnpackTuple'
<string>:(.text+0x47): undefined reference to `PyEval_SaveThread'
<string>:(.text+0x53): undefined reference to `PyEval_RestoreThread'
<string>:(.text+0x62): undefined reference to `PyLong_FromLongLong'
<string>:(.text+0x74): undefined reference to `PyExc_RuntimeError'
<string>:(.text+0x88): undefined reference to `PyErr_SetString'
我怀疑 Numba and/or Python 标准库需要根据生成的目标文件动态 linked 才能成功 运行 但是我不确定它是如何完成的(如果它甚至可以在第一时间完成)。
我还尝试了以下方法,将中间 LLVM 代码写入文件而不是程序集:
with open("six.ll", "w") as f:
for k, v in six.inspect_llvm().items():
f.write(v)
然后
$ lli six.ll
但这也失败并出现以下错误:
'main' function not found in module.
更新:
事实证明,存在一个实用程序可以找到相关标志以传递给 ld
命令以动态 link Python 标准库。
$ python3-config --ldflags
Returns
-L/Users/rayan/anaconda3/lib/python3.7/config-3.7m-darwin -lpython3.7m -ldl -framework CoreFoundation
运行 再次执行以下操作,这次使用了正确的标志:
$ as -o six.o six.asm
$ ld six.o -o six.bin -L/Users/rayan/anaconda3/lib/python3.7/config-3.7m-darwin -lpython3.7m -ldl -framework CoreFoundation
$ chmod +x six.bin
$ ./six.bin
我现在得到
ld: warning: No version-min specified on command line
ld: entry point (_main) undefined. for inferred architecture x86_64
我曾尝试在程序集文件中添加一个 _main
标签,但这似乎没有任何作用。关于如何定义入口点有什么想法吗?
更新 2:
这里是汇编代码,以防万一,目标函数似乎是带有标签 _ZN8__main__7six1E
:
的函数
.text
.file "<string>"
.globl _ZN8__main__7six1E
.p2align 4, 0x90
.type _ZN8__main__7six1E,@function
_ZN8__main__7six1E:
movq , (%rdi)
xorl %eax, %eax
retq
.Lfunc_end0:
.size _ZN8__main__7six1E, .Lfunc_end0-_ZN8__main__7six1E
.globl _ZN7cpython8__main__7six1E
.p2align 4, 0x90
.type _ZN7cpython8__main__7six1E,@function
_ZN7cpython8__main__7six1E:
.cfi_startproc
pushq %rax
.cfi_def_cfa_offset 16
movq %rsi, %rdi
movabsq $.const.six, %rsi
movabsq $PyArg_UnpackTuple, %r8
xorl %edx, %edx
xorl %ecx, %ecx
xorl %eax, %eax
callq *%r8
testl %eax, %eax
je .LBB1_3
movabsq $_ZN08NumbaEnv8__main__7six1E, %rax
cmpq [=20=], (%rax)
je .LBB1_2
movabsq $PyEval_SaveThread, %rax
callq *%rax
movabsq $PyEval_RestoreThread, %rcx
movq %rax, %rdi
callq *%rcx
movabsq $PyLong_FromLongLong, %rax
movl , %edi
popq %rcx
.cfi_def_cfa_offset 8
jmpq *%rax
.LBB1_2:
.cfi_def_cfa_offset 16
movabsq $PyExc_RuntimeError, %rdi
movabsq $".const.missing Environment", %rsi
movabsq $PyErr_SetString, %rax
callq *%rax
.LBB1_3:
xorl %eax, %eax
popq %rcx
.cfi_def_cfa_offset 8
retq
.Lfunc_end1:
.size _ZN7cpython8__main__7six1E, .Lfunc_end1-_ZN7cpython8__main__7six1E
.cfi_endproc
.globl cfunc._ZN8__main__7six1E
.p2align 4, 0x90
.type cfunc._ZN8__main__7six1E,@function
cfunc._ZN8__main__7six1E:
movl , %eax
retq
.Lfunc_end2:
.size cfunc._ZN8__main__7six1E, .Lfunc_end2-cfunc._ZN8__main__7six1E
.type _ZN08NumbaEnv8__main__7six1E,@object
.comm _ZN08NumbaEnv8__main__7six1E,8,8
.type .const.six,@object
.section .rodata,"a",@progbits
.const.six:
.asciz "six"
.size .const.six, 4
.type ".const.missing Environment",@object
.p2align 4
.const.missing Environment:
.asciz "missing Environment"
.size ".const.missing Environment", 20
.section ".note.GNU-stack","",@progbits
在浏览 [PyData.Numba]: Numba docs 和一些调试、试验和错误后,我得出一个结论:看来你偏离了你的追求(正如评论中也指出的那样)。
Numba 将 Python 代码(函数)转换为机器代码(原因很明显:速度)。它可以即时完成所有操作(在 运行ning 过程中进行转换、构建、插入),程序员只需将函数修饰为 e.g. @numba.jit
([PyData.Numba]: Just-in-Time compilation).
您遇到的行为是正确的。 Dispatcher 对象(用于修饰 six 函数)只为函数本身生成(汇编)代码(它不是 main 在那里,因为代码正在当前进程中执行(Python 解释器的 main 函数)。因此,链接器抱怨没有 main 符号是正常的。这就像写一个 C 文件只包含:
int six() {
return 6;
}
为了让事情正常工作,您必须:
将.asm文件构建成.o(对象)文件(完成)
将 .o 文件从 #1. 包含到一个可以是
的库中
- 静态
- 动态
库将链接到(最终)可执行文件中。此步骤是可选的,因为您可以直接使用 .o 文件
构建另一个定义 main(并调用 six - 我认为这是全部目的)的文件一个 .o 文件。由于我不太习惯汇编,所以我把它写在 C
Link 2 个实体(来自 #2. (#1.) 和 #3.)一起
作为替代方案,您可以查看 [PyData.Numba]: Compiling code ahead of time,但请记住,它会生成一个 Python(扩展)模块。
回到当前的问题。在Ubuntu 18.04 64bit.
上测试了吗
code00.py:
#!/usr/bin/env python
import sys
import math
import numba
@numba.jit(nopython=True, nogil=True)
def six():
return 6
def main(*argv):
six() # Call the function(s), otherwise `inspect_asm()` would return empty dict
speed_funcs = [
(six, numba.int32()),
]
for func, _ in speed_funcs:
file_name_asm = "numba_{0:s}_{1:s}_{2:03d}_{3:02d}{4:02d}{5:02d}.asm".format(func.__name__, sys.platform, int(round(math.log2(sys.maxsize))) + 1, *sys.version_info[:3])
asm = func.inspect_asm()
print("Writing to {0:s}:".format(file_name_asm))
with open(file_name_asm, "wb") as fout:
for k, v in asm.items():
print(" {0:}".format(k))
fout.write(v.encode())
if __name__ == "__main__":
print("Python {0:s} {1:d}bit on {2:s}\n".format(" ".join(item.strip() for item in sys.version.split("\n")), 64 if sys.maxsize > 0x100000000 else 32, sys.platform))
main(*sys.argv[1:])
print("\nDone.")
main00.c:
#include <stdio.h>
#include <dlfcn.h>
//#define SYMBOL_SIX "_ZN8__main__7six1E"
#define SYMBOL_SIX "cfunc._ZN8__main__7six1E"
typedef int (*SixFuncPtr)();
int main() {
void *pMod = dlopen("./libnumba_six_linux.so", RTLD_LAZY);
if (!pMod) {
printf("Error (%s) loading module\n", dlerror());
return -1;
}
SixFuncPtr pSixFunc = dlsym(pMod, SYMBOL_SIX);
if (!pSixFunc)
{
printf("Error (%s) loading function\n", dlerror());
dlclose(pMod);
return -2;
}
printf("six() returned: %d\n", (*pSixFunc)());
dlclose(pMod);
return 0;
}
build.sh:
CC=gcc
LIB_BASE_NAME=numba_six_linux
FLAG_LD_LIB_NUMBALINUX="-Wl,-L. -Wl,-l${LIB_BASE_NAME}"
FLAG_LD_LIB_PYTHON="-Wl,-L/usr/lib/python3.7/config-3.7m-x86_64-linux-gnu -Wl,-lpython3.7m"
rm -f *.asm *.o *.a *.so *.exe
echo Generate .asm
python3 code00.py
echo Assemble
as -o ${LIB_BASE_NAME}.o ${LIB_BASE_NAME}_064_030705.asm
echo Link library
LIB_NUMBA="./lib${LIB_BASE_NAME}.so"
#ar -scr ${LIB_NUMBA} ${LIB_BASE_NAME}.o
${CC} -o ${LIB_NUMBA} -shared ${LIB_BASE_NAME}.o ${FLAG_LD_LIB_PYTHON}
echo Dump library contents
nm -S ${LIB_NUMBA}
#objdump -t ${LIB_NUMBA}
echo Compile and link executable
${CC} -o main00.exe main00.c -ldl
echo Exit script
输出:
(py_venv_pc064_03.07.05_test0) [cfati@cfati-ubtu-18-064-00:~/Work/Dev/Whosebug/q061678226]> ~/sopr.sh
*** Set shorter prompt to better fit when pasted in Whosebug (or other) pages ***
[064bit prompt]>
[064bit prompt]> ls
build.sh code00.py main00.c
[064bit prompt]>
[064bit prompt]> ./build.sh
Generate .asm
Python 3.7.5 (default, Nov 7 2019, 10:50:52) [GCC 8.3.0] 64bit on linux
Writing to numba_six_linux_064_030705.asm:
()
Done.
Assemble
Link library
Dump library contents
0000000000201020 B __bss_start
00000000000008b0 0000000000000006 T cfunc._ZN8__main__7six1E
0000000000201020 0000000000000001 b completed.7698
00000000000008e0 0000000000000014 r .const.missing Environment
00000000000008d0 0000000000000004 r .const.six
w __cxa_finalize
0000000000000730 t deregister_tm_clones
00000000000007c0 t __do_global_dtors_aux
0000000000200e58 t __do_global_dtors_aux_fini_array_entry
0000000000201018 d __dso_handle
0000000000200e60 d _DYNAMIC
0000000000201020 D _edata
0000000000201030 B _end
00000000000008b8 T _fini
0000000000000800 t frame_dummy
0000000000200e50 t __frame_dummy_init_array_entry
0000000000000990 r __FRAME_END__
0000000000201000 d _GLOBAL_OFFSET_TABLE_
w __gmon_start__
00000000000008f4 r __GNU_EH_FRAME_HDR
00000000000006f0 T _init
w _ITM_deregisterTMCloneTable
w _ITM_registerTMCloneTable
U PyArg_UnpackTuple
U PyErr_SetString
U PyEval_RestoreThread
U PyEval_SaveThread
U PyExc_RuntimeError
U PyLong_FromLongLong
0000000000000770 t register_tm_clones
0000000000201020 d __TMC_END__
0000000000201028 0000000000000008 B _ZN08NumbaEnv8__main__7six1E
0000000000000820 0000000000000086 T _ZN7cpython8__main__7six1E
0000000000000810 000000000000000a T _ZN8__main__7six1E
Compile and link executable
Exit script
[064bit prompt]>
[064bit prompt]> ls
build.sh code00.py libnumba_six_linux.so main00.c main00.exe numba_six_linux_064_030705.asm numba_six_linux.o
[064bit prompt]>
[064bit prompt]> # Run the executable
[064bit prompt]>
[064bit prompt]> ./main00.exe
six() returned: 6
[064bit prompt]>
同时发布(因为它很重要)numba_six_linux_064_030705.asm:
.text
.file "<string>"
.globl _ZN8__main__7six1E
.p2align 4, 0x90
.type _ZN8__main__7six1E,@function
_ZN8__main__7six1E:
movq , (%rdi)
xorl %eax, %eax
retq
.Lfunc_end0:
.size _ZN8__main__7six1E, .Lfunc_end0-_ZN8__main__7six1E
.globl _ZN7cpython8__main__7six1E
.p2align 4, 0x90
.type _ZN7cpython8__main__7six1E,@function
_ZN7cpython8__main__7six1E:
.cfi_startproc
pushq %rax
.cfi_def_cfa_offset 16
movq %rsi, %rdi
movabsq $.const.six, %rsi
movabsq $PyArg_UnpackTuple, %r8
xorl %edx, %edx
xorl %ecx, %ecx
xorl %eax, %eax
callq *%r8
testl %eax, %eax
je .LBB1_3
movabsq $_ZN08NumbaEnv8__main__7six1E, %rax
cmpq [=15=], (%rax)
je .LBB1_2
movabsq $PyEval_SaveThread, %rax
callq *%rax
movabsq $PyEval_RestoreThread, %rcx
movq %rax, %rdi
callq *%rcx
movabsq $PyLong_FromLongLong, %rax
movl , %edi
popq %rcx
.cfi_def_cfa_offset 8
jmpq *%rax
.LBB1_2:
.cfi_def_cfa_offset 16
movabsq $PyExc_RuntimeError, %rdi
movabsq $".const.missing Environment", %rsi
movabsq $PyErr_SetString, %rax
callq *%rax
.LBB1_3:
xorl %eax, %eax
popq %rcx
.cfi_def_cfa_offset 8
retq
.Lfunc_end1:
.size _ZN7cpython8__main__7six1E, .Lfunc_end1-_ZN7cpython8__main__7six1E
.cfi_endproc
.globl cfunc._ZN8__main__7six1E
.p2align 4, 0x90
.type cfunc._ZN8__main__7six1E,@function
cfunc._ZN8__main__7six1E:
movl , %eax
retq
.Lfunc_end2:
.size cfunc._ZN8__main__7six1E, .Lfunc_end2-cfunc._ZN8__main__7six1E
.type _ZN08NumbaEnv8__main__7six1E,@object
.comm _ZN08NumbaEnv8__main__7six1E,8,8
.type .const.six,@object
.section .rodata,"a",@progbits
.const.six:
.asciz "six"
.size .const.six, 4
.type ".const.missing Environment",@object
.p2align 4
".const.missing Environment":
.asciz "missing Environment"
.size ".const.missing Environment", 20
.section ".note.GNU-stack","",@progbits
备注:
numba_six_linux_064_030705.asm(以及派生自它的所有内容)包含 six 函数的代码.实际上,有一堆符号(在OSX,你也可以使用原生otool -T
)像:
cfunc._ZN8__main__7six$241E - (C) 函数本身
_ZN7cpython8__main__7six$241E - Python 包装器:
- 执行 C <=> Python 转换(通过 Python API 函数类似于 PyArg_UnpackTuple)
- 由于#1.需要(取决于)libpython3.7m
- 因此,
nopython=True
在这种情况下无效
此外,这些符号中的 main 部分不是指可执行入口点(main 函数),而是指一个Python 模块的顶级命名空间 (__main__)。毕竟这段代码应该是运行 from Python
由于 C 普通函数包含一个 点 (.) ,我不能直接从 C 调用它(因为它是一个无效的标识符名称),所以我有到 load (.so 和) 手动 (dlopen / dlsym),导致代码比简单地调用函数更多。
我没试过,但我认为对生成的 .asm 文件进行以下(手动)更改会简化工作:
- 重命名普通 C 函数名称(类似于 __six,或任何其他有效的 C 标识符在组装之前也不与 .asm 文件中的另一个(显式或内部)名称冲突,将使该函数可直接从 C
- 删除 Python 包装器 (#2.) 也会删除 #22 .
更新#0
感谢@PeterCordes,他分享了我遗漏的确切信息 ([GNU.GCC]: Controlling Names Used in Assembler Code),这里有一个更简单的版本。
main01.c:
#include <stdio.h>
extern int six() asm ("cfunc._ZN8__main__7six1E");
int main() {
printf("six() returned: %d\n", six());
}
输出:
[064bit prompt]> # Resume from previous point + main01.c
[064bit prompt]>
[064bit prompt]> ls
build.sh code00.py libnumba_six_linux.so main00.c main00.exe main01.c numba_six_linux_064_030705.asm numba_six_linux.o
[064bit prompt]>
[064bit prompt]> ar -scr libnumba_six_linux.a numba_six_linux.o
[064bit prompt]>
[064bit prompt]> gcc -o main01.exe main01.c ./libnumba_six_linux.a -Wl,-L/usr/lib/python3.7/config-3.7m-x86_64-linux-gnu -Wl,-lpython3.7m
[064bit prompt]>
[064bit prompt]> ls
build.sh code00.py libnumba_six_linux.a libnumba_six_linux.so main00.c main00.exe main01.c main01.exe numba_six_linux_064_030705.asm numba_six_linux.o
[064bit prompt]>
[064bit prompt]> ./main01.exe
six() returned: 6
[064bit prompt]>
在奇怪的事件中,我陷入了以下困境,我使用以下 Python 代码将 Numba 生成的程序集写入文件:
@jit(nopython=True, nogil=True)
def six():
return 6
with open("six.asm", "w") as f:
for k, v in six.inspect_asm().items():
f.write(v)
汇编代码已成功写入文件,但我不知道如何执行它。我尝试了以下方法:
$ as -o six.o six.asm
$ ld six.o -o six.bin
$ chmod +x six.bin
$ ./six.bin
但是,linking 步骤失败并显示以下内容:
ld: warning: cannot find entry symbol _start; defaulting to 00000000004000f0
six.o: In function `cpython::__main__::six1':
<string>:(.text+0x20): undefined reference to `PyArg_UnpackTuple'
<string>:(.text+0x47): undefined reference to `PyEval_SaveThread'
<string>:(.text+0x53): undefined reference to `PyEval_RestoreThread'
<string>:(.text+0x62): undefined reference to `PyLong_FromLongLong'
<string>:(.text+0x74): undefined reference to `PyExc_RuntimeError'
<string>:(.text+0x88): undefined reference to `PyErr_SetString'
我怀疑 Numba and/or Python 标准库需要根据生成的目标文件动态 linked 才能成功 运行 但是我不确定它是如何完成的(如果它甚至可以在第一时间完成)。
我还尝试了以下方法,将中间 LLVM 代码写入文件而不是程序集:
with open("six.ll", "w") as f:
for k, v in six.inspect_llvm().items():
f.write(v)
然后
$ lli six.ll
但这也失败并出现以下错误:
'main' function not found in module.
更新:
事实证明,存在一个实用程序可以找到相关标志以传递给 ld
命令以动态 link Python 标准库。
$ python3-config --ldflags
Returns
-L/Users/rayan/anaconda3/lib/python3.7/config-3.7m-darwin -lpython3.7m -ldl -framework CoreFoundation
运行 再次执行以下操作,这次使用了正确的标志:
$ as -o six.o six.asm
$ ld six.o -o six.bin -L/Users/rayan/anaconda3/lib/python3.7/config-3.7m-darwin -lpython3.7m -ldl -framework CoreFoundation
$ chmod +x six.bin
$ ./six.bin
我现在得到
ld: warning: No version-min specified on command line
ld: entry point (_main) undefined. for inferred architecture x86_64
我曾尝试在程序集文件中添加一个 _main
标签,但这似乎没有任何作用。关于如何定义入口点有什么想法吗?
更新 2:
这里是汇编代码,以防万一,目标函数似乎是带有标签 _ZN8__main__7six1E
:
.text
.file "<string>"
.globl _ZN8__main__7six1E
.p2align 4, 0x90
.type _ZN8__main__7six1E,@function
_ZN8__main__7six1E:
movq , (%rdi)
xorl %eax, %eax
retq
.Lfunc_end0:
.size _ZN8__main__7six1E, .Lfunc_end0-_ZN8__main__7six1E
.globl _ZN7cpython8__main__7six1E
.p2align 4, 0x90
.type _ZN7cpython8__main__7six1E,@function
_ZN7cpython8__main__7six1E:
.cfi_startproc
pushq %rax
.cfi_def_cfa_offset 16
movq %rsi, %rdi
movabsq $.const.six, %rsi
movabsq $PyArg_UnpackTuple, %r8
xorl %edx, %edx
xorl %ecx, %ecx
xorl %eax, %eax
callq *%r8
testl %eax, %eax
je .LBB1_3
movabsq $_ZN08NumbaEnv8__main__7six1E, %rax
cmpq [=20=], (%rax)
je .LBB1_2
movabsq $PyEval_SaveThread, %rax
callq *%rax
movabsq $PyEval_RestoreThread, %rcx
movq %rax, %rdi
callq *%rcx
movabsq $PyLong_FromLongLong, %rax
movl , %edi
popq %rcx
.cfi_def_cfa_offset 8
jmpq *%rax
.LBB1_2:
.cfi_def_cfa_offset 16
movabsq $PyExc_RuntimeError, %rdi
movabsq $".const.missing Environment", %rsi
movabsq $PyErr_SetString, %rax
callq *%rax
.LBB1_3:
xorl %eax, %eax
popq %rcx
.cfi_def_cfa_offset 8
retq
.Lfunc_end1:
.size _ZN7cpython8__main__7six1E, .Lfunc_end1-_ZN7cpython8__main__7six1E
.cfi_endproc
.globl cfunc._ZN8__main__7six1E
.p2align 4, 0x90
.type cfunc._ZN8__main__7six1E,@function
cfunc._ZN8__main__7six1E:
movl , %eax
retq
.Lfunc_end2:
.size cfunc._ZN8__main__7six1E, .Lfunc_end2-cfunc._ZN8__main__7six1E
.type _ZN08NumbaEnv8__main__7six1E,@object
.comm _ZN08NumbaEnv8__main__7six1E,8,8
.type .const.six,@object
.section .rodata,"a",@progbits
.const.six:
.asciz "six"
.size .const.six, 4
.type ".const.missing Environment",@object
.p2align 4
.const.missing Environment:
.asciz "missing Environment"
.size ".const.missing Environment", 20
.section ".note.GNU-stack","",@progbits
在浏览 [PyData.Numba]: Numba docs 和一些调试、试验和错误后,我得出一个结论:看来你偏离了你的追求(正如评论中也指出的那样)。
Numba 将 Python 代码(函数)转换为机器代码(原因很明显:速度)。它可以即时完成所有操作(在 运行ning 过程中进行转换、构建、插入),程序员只需将函数修饰为 e.g. @numba.jit
([PyData.Numba]: Just-in-Time compilation).
您遇到的行为是正确的。 Dispatcher 对象(用于修饰 six 函数)只为函数本身生成(汇编)代码(它不是 main 在那里,因为代码正在当前进程中执行(Python 解释器的 main 函数)。因此,链接器抱怨没有 main 符号是正常的。这就像写一个 C 文件只包含:
int six() {
return 6;
}
为了让事情正常工作,您必须:
将.asm文件构建成.o(对象)文件(完成)
将 .o 文件从 #1. 包含到一个可以是
的库中- 静态
- 动态
库将链接到(最终)可执行文件中。此步骤是可选的,因为您可以直接使用 .o 文件构建另一个定义 main(并调用 six - 我认为这是全部目的)的文件一个 .o 文件。由于我不太习惯汇编,所以我把它写在 C
Link 2 个实体(来自 #2. (#1.) 和 #3.)一起
作为替代方案,您可以查看 [PyData.Numba]: Compiling code ahead of time,但请记住,它会生成一个 Python(扩展)模块。
回到当前的问题。在Ubuntu 18.04 64bit.
上测试了吗code00.py:
#!/usr/bin/env python
import sys
import math
import numba
@numba.jit(nopython=True, nogil=True)
def six():
return 6
def main(*argv):
six() # Call the function(s), otherwise `inspect_asm()` would return empty dict
speed_funcs = [
(six, numba.int32()),
]
for func, _ in speed_funcs:
file_name_asm = "numba_{0:s}_{1:s}_{2:03d}_{3:02d}{4:02d}{5:02d}.asm".format(func.__name__, sys.platform, int(round(math.log2(sys.maxsize))) + 1, *sys.version_info[:3])
asm = func.inspect_asm()
print("Writing to {0:s}:".format(file_name_asm))
with open(file_name_asm, "wb") as fout:
for k, v in asm.items():
print(" {0:}".format(k))
fout.write(v.encode())
if __name__ == "__main__":
print("Python {0:s} {1:d}bit on {2:s}\n".format(" ".join(item.strip() for item in sys.version.split("\n")), 64 if sys.maxsize > 0x100000000 else 32, sys.platform))
main(*sys.argv[1:])
print("\nDone.")
main00.c:
#include <stdio.h>
#include <dlfcn.h>
//#define SYMBOL_SIX "_ZN8__main__7six1E"
#define SYMBOL_SIX "cfunc._ZN8__main__7six1E"
typedef int (*SixFuncPtr)();
int main() {
void *pMod = dlopen("./libnumba_six_linux.so", RTLD_LAZY);
if (!pMod) {
printf("Error (%s) loading module\n", dlerror());
return -1;
}
SixFuncPtr pSixFunc = dlsym(pMod, SYMBOL_SIX);
if (!pSixFunc)
{
printf("Error (%s) loading function\n", dlerror());
dlclose(pMod);
return -2;
}
printf("six() returned: %d\n", (*pSixFunc)());
dlclose(pMod);
return 0;
}
build.sh:
CC=gcc
LIB_BASE_NAME=numba_six_linux
FLAG_LD_LIB_NUMBALINUX="-Wl,-L. -Wl,-l${LIB_BASE_NAME}"
FLAG_LD_LIB_PYTHON="-Wl,-L/usr/lib/python3.7/config-3.7m-x86_64-linux-gnu -Wl,-lpython3.7m"
rm -f *.asm *.o *.a *.so *.exe
echo Generate .asm
python3 code00.py
echo Assemble
as -o ${LIB_BASE_NAME}.o ${LIB_BASE_NAME}_064_030705.asm
echo Link library
LIB_NUMBA="./lib${LIB_BASE_NAME}.so"
#ar -scr ${LIB_NUMBA} ${LIB_BASE_NAME}.o
${CC} -o ${LIB_NUMBA} -shared ${LIB_BASE_NAME}.o ${FLAG_LD_LIB_PYTHON}
echo Dump library contents
nm -S ${LIB_NUMBA}
#objdump -t ${LIB_NUMBA}
echo Compile and link executable
${CC} -o main00.exe main00.c -ldl
echo Exit script
输出:
(py_venv_pc064_03.07.05_test0) [cfati@cfati-ubtu-18-064-00:~/Work/Dev/Whosebug/q061678226]> ~/sopr.sh *** Set shorter prompt to better fit when pasted in Whosebug (or other) pages *** [064bit prompt]> [064bit prompt]> ls build.sh code00.py main00.c [064bit prompt]> [064bit prompt]> ./build.sh Generate .asm Python 3.7.5 (default, Nov 7 2019, 10:50:52) [GCC 8.3.0] 64bit on linux Writing to numba_six_linux_064_030705.asm: () Done. Assemble Link library Dump library contents 0000000000201020 B __bss_start 00000000000008b0 0000000000000006 T cfunc._ZN8__main__7six1E 0000000000201020 0000000000000001 b completed.7698 00000000000008e0 0000000000000014 r .const.missing Environment 00000000000008d0 0000000000000004 r .const.six w __cxa_finalize 0000000000000730 t deregister_tm_clones 00000000000007c0 t __do_global_dtors_aux 0000000000200e58 t __do_global_dtors_aux_fini_array_entry 0000000000201018 d __dso_handle 0000000000200e60 d _DYNAMIC 0000000000201020 D _edata 0000000000201030 B _end 00000000000008b8 T _fini 0000000000000800 t frame_dummy 0000000000200e50 t __frame_dummy_init_array_entry 0000000000000990 r __FRAME_END__ 0000000000201000 d _GLOBAL_OFFSET_TABLE_ w __gmon_start__ 00000000000008f4 r __GNU_EH_FRAME_HDR 00000000000006f0 T _init w _ITM_deregisterTMCloneTable w _ITM_registerTMCloneTable U PyArg_UnpackTuple U PyErr_SetString U PyEval_RestoreThread U PyEval_SaveThread U PyExc_RuntimeError U PyLong_FromLongLong 0000000000000770 t register_tm_clones 0000000000201020 d __TMC_END__ 0000000000201028 0000000000000008 B _ZN08NumbaEnv8__main__7six1E 0000000000000820 0000000000000086 T _ZN7cpython8__main__7six1E 0000000000000810 000000000000000a T _ZN8__main__7six1E Compile and link executable Exit script [064bit prompt]> [064bit prompt]> ls build.sh code00.py libnumba_six_linux.so main00.c main00.exe numba_six_linux_064_030705.asm numba_six_linux.o [064bit prompt]> [064bit prompt]> # Run the executable [064bit prompt]> [064bit prompt]> ./main00.exe six() returned: 6 [064bit prompt]>
同时发布(因为它很重要)numba_six_linux_064_030705.asm:
.text
.file "<string>"
.globl _ZN8__main__7six1E
.p2align 4, 0x90
.type _ZN8__main__7six1E,@function
_ZN8__main__7six1E:
movq , (%rdi)
xorl %eax, %eax
retq
.Lfunc_end0:
.size _ZN8__main__7six1E, .Lfunc_end0-_ZN8__main__7six1E
.globl _ZN7cpython8__main__7six1E
.p2align 4, 0x90
.type _ZN7cpython8__main__7six1E,@function
_ZN7cpython8__main__7six1E:
.cfi_startproc
pushq %rax
.cfi_def_cfa_offset 16
movq %rsi, %rdi
movabsq $.const.six, %rsi
movabsq $PyArg_UnpackTuple, %r8
xorl %edx, %edx
xorl %ecx, %ecx
xorl %eax, %eax
callq *%r8
testl %eax, %eax
je .LBB1_3
movabsq $_ZN08NumbaEnv8__main__7six1E, %rax
cmpq [=15=], (%rax)
je .LBB1_2
movabsq $PyEval_SaveThread, %rax
callq *%rax
movabsq $PyEval_RestoreThread, %rcx
movq %rax, %rdi
callq *%rcx
movabsq $PyLong_FromLongLong, %rax
movl , %edi
popq %rcx
.cfi_def_cfa_offset 8
jmpq *%rax
.LBB1_2:
.cfi_def_cfa_offset 16
movabsq $PyExc_RuntimeError, %rdi
movabsq $".const.missing Environment", %rsi
movabsq $PyErr_SetString, %rax
callq *%rax
.LBB1_3:
xorl %eax, %eax
popq %rcx
.cfi_def_cfa_offset 8
retq
.Lfunc_end1:
.size _ZN7cpython8__main__7six1E, .Lfunc_end1-_ZN7cpython8__main__7six1E
.cfi_endproc
.globl cfunc._ZN8__main__7six1E
.p2align 4, 0x90
.type cfunc._ZN8__main__7six1E,@function
cfunc._ZN8__main__7six1E:
movl , %eax
retq
.Lfunc_end2:
.size cfunc._ZN8__main__7six1E, .Lfunc_end2-cfunc._ZN8__main__7six1E
.type _ZN08NumbaEnv8__main__7six1E,@object
.comm _ZN08NumbaEnv8__main__7six1E,8,8
.type .const.six,@object
.section .rodata,"a",@progbits
.const.six:
.asciz "six"
.size .const.six, 4
.type ".const.missing Environment",@object
.p2align 4
".const.missing Environment":
.asciz "missing Environment"
.size ".const.missing Environment", 20
.section ".note.GNU-stack","",@progbits
备注:
numba_six_linux_064_030705.asm(以及派生自它的所有内容)包含 six 函数的代码.实际上,有一堆符号(在OSX,你也可以使用原生
otool -T
)像:cfunc._ZN8__main__7six$241E - (C) 函数本身
_ZN7cpython8__main__7six$241E - Python 包装器:
- 执行 C <=> Python 转换(通过 Python API 函数类似于 PyArg_UnpackTuple)
- 由于#1.需要(取决于)libpython3.7m
- 因此,
nopython=True
在这种情况下无效
此外,这些符号中的 main 部分不是指可执行入口点(main 函数),而是指一个Python 模块的顶级命名空间 (__main__)。毕竟这段代码应该是运行 from Python
由于 C 普通函数包含一个 点 (.) ,我不能直接从 C 调用它(因为它是一个无效的标识符名称),所以我有到 load (.so 和) 手动 (dlopen / dlsym),导致代码比简单地调用函数更多。
我没试过,但我认为对生成的 .asm 文件进行以下(手动)更改会简化工作:- 重命名普通 C 函数名称(类似于 __six,或任何其他有效的 C 标识符在组装之前也不与 .asm 文件中的另一个(显式或内部)名称冲突,将使该函数可直接从 C
- 删除 Python 包装器 (#2.) 也会删除 #22 .
更新#0
感谢@PeterCordes,他分享了我遗漏的确切信息 ([GNU.GCC]: Controlling Names Used in Assembler Code),这里有一个更简单的版本。
main01.c:
#include <stdio.h>
extern int six() asm ("cfunc._ZN8__main__7six1E");
int main() {
printf("six() returned: %d\n", six());
}
输出:
[064bit prompt]> # Resume from previous point + main01.c [064bit prompt]> [064bit prompt]> ls build.sh code00.py libnumba_six_linux.so main00.c main00.exe main01.c numba_six_linux_064_030705.asm numba_six_linux.o [064bit prompt]> [064bit prompt]> ar -scr libnumba_six_linux.a numba_six_linux.o [064bit prompt]> [064bit prompt]> gcc -o main01.exe main01.c ./libnumba_six_linux.a -Wl,-L/usr/lib/python3.7/config-3.7m-x86_64-linux-gnu -Wl,-lpython3.7m [064bit prompt]> [064bit prompt]> ls build.sh code00.py libnumba_six_linux.a libnumba_six_linux.so main00.c main00.exe main01.c main01.exe numba_six_linux_064_030705.asm numba_six_linux.o [064bit prompt]> [064bit prompt]> ./main01.exe six() returned: 6 [064bit prompt]>