为什么使用 std::mutex 的函数会对 pthread_key_create 的地址进行空检查?
Why do functions using std::mutex make a null check of the address of pthread_key_create?
采用这个简单的函数,在 std::mutex
实现的锁下递增整数:
#include <mutex>
std::mutex m;
void inc(int& i) {
std::unique_lock<std::mutex> lock(m);
i++;
}
我希望这个(内联后)以一种直接的方式编译为调用 m.lock()
增量 i
然后 m.unlock()
.
检查最近版本的 gcc
和 clang
生成的程序集,但是,我们发现了一个额外的复杂问题。先拿gcc
版本:
inc(int&):
mov eax, OFFSET FLAT:__gthrw___pthread_key_create(unsigned int*, void (*)(void*))
test rax, rax
je .L2
push rbx
mov rbx, rdi
mov edi, OFFSET FLAT:m
call __gthrw_pthread_mutex_lock(pthread_mutex_t*)
test eax, eax
jne .L10
add DWORD PTR [rbx], 1
mov edi, OFFSET FLAT:m
pop rbx
jmp __gthrw_pthread_mutex_unlock(pthread_mutex_t*)
.L2:
add DWORD PTR [rdi], 1
ret
.L10:
mov edi, eax
call std::__throw_system_error(int)
有趣的是前几行。汇编代码检查 __gthrw___pthread_key_create
的地址(这是 pthread_key_create
的实现 - 创建线程本地存储键的函数),如果它为零,则分支到 .L2
它在一条指令中实现了增量,根本没有任何锁定。
如果它不为零,它会按预期进行:锁定互斥体,进行递增,然后解锁。
clang
做的更多:它检查函数的地址 两次 ,一次在 lock
之前,一次在 unlock
之前:
inc(int&): # @inc(int&)
push rbx
mov rbx, rdi
mov eax, __pthread_key_create
test rax, rax
je .LBB0_4
mov edi, m
call pthread_mutex_lock
test eax, eax
jne .LBB0_6
inc dword ptr [rbx]
mov eax, __pthread_key_create
test rax, rax
je .LBB0_5
mov edi, m
pop rbx
jmp pthread_mutex_unlock # TAILCALL
.LBB0_4:
inc dword ptr [rbx]
.LBB0_5:
pop rbx
ret
.LBB0_6:
mov edi, eax
call std::__throw_system_error(int)
这项检查的目的是什么?
也许是为了支持目标文件最终被编译为二进制文件而不支持 pthreads 的情况,然后在这种情况下退回到没有锁定的版本?我找不到关于此行为的任何文档。
你的猜测看起来是正确的。来自 gcc 源存储库 (https://github.com/gcc-mirror/gcc.git) 中的 libgcc/gthr-posix.h
文件:
/* For a program to be multi-threaded the only thing that it certainly must
be using is pthread_create. However, there may be other libraries that
intercept pthread_create with their own definitions to wrap pthreads
functionality for some purpose. In those cases, pthread_create being
defined might not necessarily mean that libpthread is actually linked
in.
For the GNU C library, we can use a known internal name. This is always
available in the ABI, but no other library would define it. That is
ideal, since any public pthread function might be intercepted just as
pthread_create might be. __pthread_key_create is an "internal"
implementation symbol, but it is part of the public exported ABI. Also,
it's among the symbols that the static libpthread.a always links in
whenever pthread_create is used, so there is no danger of a false
negative result in any statically-linked, multi-threaded program.
For others, we choose pthread_cancel as a function that seems unlikely
to be redefined by an interceptor library. The bionic (Android) C
library does not provide pthread_cancel, so we do use pthread_create
there (and interceptor libraries lose). */
#ifdef __GLIBC__
__gthrw2(__gthrw_(__pthread_key_create),
__pthread_key_create,
pthread_key_create)
# define GTHR_ACTIVE_PROXY __gthrw_(__pthread_key_create)
#elif defined (__BIONIC__)
# define GTHR_ACTIVE_PROXY __gthrw_(pthread_create)
#else
# define GTHR_ACTIVE_PROXY __gthrw_(pthread_cancel)
#endif
static inline int
__gthread_active_p (void)
{
static void *const __gthread_active_ptr
= __extension__ (void *) >HR_ACTIVE_PROXY;
return __gthread_active_ptr != 0;
}
然后在整个文件的其余部分,许多 pthread API 都包含在对 __gthread_active_p()
函数的检查中。如果__gthread_active_p()
returns 0什么都不做,返回成功。
采用这个简单的函数,在 std::mutex
实现的锁下递增整数:
#include <mutex>
std::mutex m;
void inc(int& i) {
std::unique_lock<std::mutex> lock(m);
i++;
}
我希望这个(内联后)以一种直接的方式编译为调用 m.lock()
增量 i
然后 m.unlock()
.
检查最近版本的 gcc
和 clang
生成的程序集,但是,我们发现了一个额外的复杂问题。先拿gcc
版本:
inc(int&):
mov eax, OFFSET FLAT:__gthrw___pthread_key_create(unsigned int*, void (*)(void*))
test rax, rax
je .L2
push rbx
mov rbx, rdi
mov edi, OFFSET FLAT:m
call __gthrw_pthread_mutex_lock(pthread_mutex_t*)
test eax, eax
jne .L10
add DWORD PTR [rbx], 1
mov edi, OFFSET FLAT:m
pop rbx
jmp __gthrw_pthread_mutex_unlock(pthread_mutex_t*)
.L2:
add DWORD PTR [rdi], 1
ret
.L10:
mov edi, eax
call std::__throw_system_error(int)
有趣的是前几行。汇编代码检查 __gthrw___pthread_key_create
的地址(这是 pthread_key_create
的实现 - 创建线程本地存储键的函数),如果它为零,则分支到 .L2
它在一条指令中实现了增量,根本没有任何锁定。
如果它不为零,它会按预期进行:锁定互斥体,进行递增,然后解锁。
clang
做的更多:它检查函数的地址 两次 ,一次在 lock
之前,一次在 unlock
之前:
inc(int&): # @inc(int&)
push rbx
mov rbx, rdi
mov eax, __pthread_key_create
test rax, rax
je .LBB0_4
mov edi, m
call pthread_mutex_lock
test eax, eax
jne .LBB0_6
inc dword ptr [rbx]
mov eax, __pthread_key_create
test rax, rax
je .LBB0_5
mov edi, m
pop rbx
jmp pthread_mutex_unlock # TAILCALL
.LBB0_4:
inc dword ptr [rbx]
.LBB0_5:
pop rbx
ret
.LBB0_6:
mov edi, eax
call std::__throw_system_error(int)
这项检查的目的是什么?
也许是为了支持目标文件最终被编译为二进制文件而不支持 pthreads 的情况,然后在这种情况下退回到没有锁定的版本?我找不到关于此行为的任何文档。
你的猜测看起来是正确的。来自 gcc 源存储库 (https://github.com/gcc-mirror/gcc.git) 中的 libgcc/gthr-posix.h
文件:
/* For a program to be multi-threaded the only thing that it certainly must
be using is pthread_create. However, there may be other libraries that
intercept pthread_create with their own definitions to wrap pthreads
functionality for some purpose. In those cases, pthread_create being
defined might not necessarily mean that libpthread is actually linked
in.
For the GNU C library, we can use a known internal name. This is always
available in the ABI, but no other library would define it. That is
ideal, since any public pthread function might be intercepted just as
pthread_create might be. __pthread_key_create is an "internal"
implementation symbol, but it is part of the public exported ABI. Also,
it's among the symbols that the static libpthread.a always links in
whenever pthread_create is used, so there is no danger of a false
negative result in any statically-linked, multi-threaded program.
For others, we choose pthread_cancel as a function that seems unlikely
to be redefined by an interceptor library. The bionic (Android) C
library does not provide pthread_cancel, so we do use pthread_create
there (and interceptor libraries lose). */
#ifdef __GLIBC__
__gthrw2(__gthrw_(__pthread_key_create),
__pthread_key_create,
pthread_key_create)
# define GTHR_ACTIVE_PROXY __gthrw_(__pthread_key_create)
#elif defined (__BIONIC__)
# define GTHR_ACTIVE_PROXY __gthrw_(pthread_create)
#else
# define GTHR_ACTIVE_PROXY __gthrw_(pthread_cancel)
#endif
static inline int
__gthread_active_p (void)
{
static void *const __gthread_active_ptr
= __extension__ (void *) >HR_ACTIVE_PROXY;
return __gthread_active_ptr != 0;
}
然后在整个文件的其余部分,许多 pthread API 都包含在对 __gthread_active_p()
函数的检查中。如果__gthread_active_p()
returns 0什么都不做,返回成功。