我是否必须阻塞主线程上的信号才能处理另一个线程上的取消点?
Do I have to block signals on the main thread to handle cancel point on another thread?
当我在专用线程中处理 TCP 服务器 运行 时,我注意到信号处理中的奇怪行为。我准备了以下 MWE(我使用 cerr
来避免调试打印时的竞争条件):
#include <signal.h>
#include <unistd.h>
#include <iostream>
#include <thread>
#include <chrono>
using namespace std;
#undef THREAD
class RaiiObject
{
public:
RaiiObject() { cerr << "RaiiObject ctor" << endl; }
~RaiiObject() { cerr << "RaiiObject dtor" << endl; }
};
static void signalHandler(int sig)
{
write(2, "Signal\n", 7);
}
static void blockSigint()
{
sigset_t blockset;
sigemptyset(&blockset);
sigaddset(&blockset, SIGINT);
sigprocmask(SIG_BLOCK, &blockset, NULL);
}
static void setSigintHandler()
{
struct sigaction sa;
sa.sa_handler = signalHandler;
sa.sa_flags = 0;
sigemptyset(&sa.sa_mask);
sigaction(SIGINT, &sa, NULL);
}
void runSelect()
{
sigset_t emptyset;
sigemptyset(&emptyset);
setSigintHandler();
RaiiObject RaiiObject{};
fd_set fdRead;
while (true) {
cerr << "Loop iteration" << endl;
FD_ZERO(&fdRead);
FD_SET(0, &fdRead);
while (true) {
if (pselect(FD_SETSIZE, &fdRead, NULL, NULL, NULL, &emptyset) > 0) {
cerr << "Select" << endl;
} else {
cerr << "Select break" << endl;
return;
}
}
}
}
int main()
{
cerr << "Main start" << endl;
#ifdef THREAD
cerr << "Thread start" << endl;
//blockSigint();
thread{runSelect}.join();
#else
runSelect();
#endif
cerr << "Main exit" << endl;
return EXIT_SUCCESS;
}
当我编译单线程程序时(#undef THREAD
),我可以用Ctrl-C正确终止runSelect()函数:
Main start
RaiiObject ctor
Loop iteration
^CSignal
Select break
RaiiObject dtor
Main exit
但是当我编译多线程(#define THREAD
)程序时,它挂在信号处理程序上:
Main start
RaiiObject ctor
Loop iteration
^CSignal
只有当我用 blockSigint()
阻塞主线程上的信号时,程序才能再次正常工作。
我用 strace -tt -f
检查了程序,我注意到工作版本使用 pselect6()
和 ERESTARTNOHAND
:
14:46:53.543360 write(2, "Loop iteration", 14Loop iteration) = 14
14:46:53.543482 write(2, "\n", 1
) = 1
14:46:53.543586 pselect6(1024, [0], NULL, NULL, NULL, {[], 8}) = ? ERESTARTNOHAND (To be restarted if no handler)
14:46:55.286989 --- SIGINT {si_signo=SIGINT, si_code=SI_USER, si_pid=2707461, si_uid=1000} ---
14:46:55.287120 write(2, "Signal\n", 7Signal
) = 7
14:46:55.287327 rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call)
14:46:55.287569 write(2, "Select break", 12Select break) = 12
14:46:55.287760 write(2, "\n", 1
但损坏的版本使用 futex()
:
[pid 3469011] 14:48:37.211792 write(2, "Loop iteration", 14Loop iteration) = 14
[pid 3469011] 14:48:37.211916 write(2, "\n", 1
) = 1
[pid 3469011] 14:48:37.212031 pselect6(1024, [0], NULL, NULL, NULL, {[], 8} <unfinished ...>
[pid 3469010] 14:48:40.046146 <... futex resumed>) = ? ERESTARTSYS (To be restarted if SA_RESTART is set)
[pid 3469010] 14:48:40.046256 --- SIGINT {si_signo=SIGINT, si_code=SI_USER, si_pid=2707461, si_uid=1000} ---
[pid 3469010] 14:48:40.046354 write(2, "Signal\n", 7Signal
) = 7
[pid 3469010] 14:48:40.046588 rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call)
[pid 3469010] 14:48:40.046821 futex(0x7f4e5c16b9d0, FUTEX_WAIT, 3469011, NULL) = ? ERESTARTSYS (To be restarted if SA_RESTART is set)
Do I have to block signals on the main thread to handle cancel point on another thread?
您需要仅在那些预期处理信号的线程中允许(取消屏蔽)信号,并在其他线程中阻止它们。
OS 会 deliver a process-directed signal to any thread 可以收到它。您的终端的 SIGINT 被发送到前台进程组中的每个进程,OS 决定每个进程的哪个线程将接收它。
如果您只有两个线程,其中一个线程在 pselect
中以原子方式取消屏蔽 SIGINT,而另一个线程阻塞了 SIGINT,那么 OS 会将 SIGINT 传递给前者。如果两者(或两者都不能)处理 SIGINT,OS 将选择其中之一。
警告:当两个线程都屏蔽了 INT 时,您的代码可能会“错过”生成的 SIGINT:
time thr1 thr2
---- ---------- ------
0 block(INT) -
1 run thread (awake) <---- SIGINT
3 join() pselect()
4 ... ...
如果信号到达 thr2 的 pselect
之外,OS 会发现两个线程都有信号被阻塞。在这种情况下,OS 可以选择它喜欢的任何线程来将信号挂起,并且可以选择永远不会解除阻塞的 thr1。 SIGINT 将丢失。
这可能适合您的应用程序,也可能不适合。
如您所见,我的问题是 sigaction()
已将信号处理程序连接到 main()
线程和 runSelect()
线程,因此 SIGINT 信号可能会被 main()
捕获.
现在我准备了一个只有主线程处理SIGINT信号并发送SIGUSR1信号给特定线程的版本pthread_kill()
。
#include <signal.h>
#include <unistd.h>
#include <iostream>
#include <thread>
#include <chrono>
using namespace std;
pthread_t nativeHandle;
class RaiiObject
{
public:
RaiiObject() { cerr << "RaiiObject ctor" << endl; }
~RaiiObject() { cerr << "RaiiObject dtor" << endl; }
};
static void sigintHandler(int)
{
write(2, "INT\n", 4);
pthread_kill(nativeHandle, SIGUSR1);
}
static void sigusrHandler(int)
{
write(2, "USR\n", 4);
}
static void blockSigint()
{
sigset_t blockset;
sigemptyset(&blockset);
sigaddset(&blockset, SIGINT);
sigprocmask(SIG_BLOCK, &blockset, NULL);
}
static void setSigintHandler()
{
struct sigaction sa;
sa.sa_handler = sigintHandler;
sa.sa_flags = 0;
sigemptyset(&sa.sa_mask);
sigaction(SIGINT, &sa, NULL);
}
static void setSigusrHandler()
{
struct sigaction sa;
sa.sa_handler = sigusrHandler;
sa.sa_flags = 0;
sigemptyset(&sa.sa_mask);
sigaction(SIGUSR1, &sa, NULL);
}
void runSelect()
{
sigset_t emptyset;
sigemptyset(&emptyset);
blockSigint();
setSigusrHandler();
RaiiObject RaiiObject{};
fd_set fdRead;
while (true) {
cerr << "Loop iteration" << endl;
FD_ZERO(&fdRead);
FD_SET(0, &fdRead);
while (true) {
if (pselect(FD_SETSIZE, &fdRead, NULL, NULL, NULL, &emptyset) > 0) {
cerr << "Select" << endl;
return;
} else {
cerr << "Select break" << endl;
return;
}
}
}
}
int main()
{
cerr << "Main start" << endl;
cerr << "Thread start" << endl;
thread runSelectThread{runSelect};
nativeHandle = runSelectThread.native_handle();
setSigintHandler();
runSelectThread.join();
cerr << "Main exit" << endl;
return EXIT_SUCCESS;
}
Main start
Thread start
RaiiObject ctor
Loop iteration
^CINT
USR
Select break
RaiiObject dtor
Main exit
当我在专用线程中处理 TCP 服务器 运行 时,我注意到信号处理中的奇怪行为。我准备了以下 MWE(我使用 cerr
来避免调试打印时的竞争条件):
#include <signal.h>
#include <unistd.h>
#include <iostream>
#include <thread>
#include <chrono>
using namespace std;
#undef THREAD
class RaiiObject
{
public:
RaiiObject() { cerr << "RaiiObject ctor" << endl; }
~RaiiObject() { cerr << "RaiiObject dtor" << endl; }
};
static void signalHandler(int sig)
{
write(2, "Signal\n", 7);
}
static void blockSigint()
{
sigset_t blockset;
sigemptyset(&blockset);
sigaddset(&blockset, SIGINT);
sigprocmask(SIG_BLOCK, &blockset, NULL);
}
static void setSigintHandler()
{
struct sigaction sa;
sa.sa_handler = signalHandler;
sa.sa_flags = 0;
sigemptyset(&sa.sa_mask);
sigaction(SIGINT, &sa, NULL);
}
void runSelect()
{
sigset_t emptyset;
sigemptyset(&emptyset);
setSigintHandler();
RaiiObject RaiiObject{};
fd_set fdRead;
while (true) {
cerr << "Loop iteration" << endl;
FD_ZERO(&fdRead);
FD_SET(0, &fdRead);
while (true) {
if (pselect(FD_SETSIZE, &fdRead, NULL, NULL, NULL, &emptyset) > 0) {
cerr << "Select" << endl;
} else {
cerr << "Select break" << endl;
return;
}
}
}
}
int main()
{
cerr << "Main start" << endl;
#ifdef THREAD
cerr << "Thread start" << endl;
//blockSigint();
thread{runSelect}.join();
#else
runSelect();
#endif
cerr << "Main exit" << endl;
return EXIT_SUCCESS;
}
当我编译单线程程序时(#undef THREAD
),我可以用Ctrl-C正确终止runSelect()函数:
Main start
RaiiObject ctor
Loop iteration
^CSignal
Select break
RaiiObject dtor
Main exit
但是当我编译多线程(#define THREAD
)程序时,它挂在信号处理程序上:
Main start
RaiiObject ctor
Loop iteration
^CSignal
只有当我用 blockSigint()
阻塞主线程上的信号时,程序才能再次正常工作。
我用 strace -tt -f
检查了程序,我注意到工作版本使用 pselect6()
和 ERESTARTNOHAND
:
14:46:53.543360 write(2, "Loop iteration", 14Loop iteration) = 14
14:46:53.543482 write(2, "\n", 1
) = 1
14:46:53.543586 pselect6(1024, [0], NULL, NULL, NULL, {[], 8}) = ? ERESTARTNOHAND (To be restarted if no handler)
14:46:55.286989 --- SIGINT {si_signo=SIGINT, si_code=SI_USER, si_pid=2707461, si_uid=1000} ---
14:46:55.287120 write(2, "Signal\n", 7Signal
) = 7
14:46:55.287327 rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call)
14:46:55.287569 write(2, "Select break", 12Select break) = 12
14:46:55.287760 write(2, "\n", 1
但损坏的版本使用 futex()
:
[pid 3469011] 14:48:37.211792 write(2, "Loop iteration", 14Loop iteration) = 14
[pid 3469011] 14:48:37.211916 write(2, "\n", 1
) = 1
[pid 3469011] 14:48:37.212031 pselect6(1024, [0], NULL, NULL, NULL, {[], 8} <unfinished ...>
[pid 3469010] 14:48:40.046146 <... futex resumed>) = ? ERESTARTSYS (To be restarted if SA_RESTART is set)
[pid 3469010] 14:48:40.046256 --- SIGINT {si_signo=SIGINT, si_code=SI_USER, si_pid=2707461, si_uid=1000} ---
[pid 3469010] 14:48:40.046354 write(2, "Signal\n", 7Signal
) = 7
[pid 3469010] 14:48:40.046588 rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call)
[pid 3469010] 14:48:40.046821 futex(0x7f4e5c16b9d0, FUTEX_WAIT, 3469011, NULL) = ? ERESTARTSYS (To be restarted if SA_RESTART is set)
Do I have to block signals on the main thread to handle cancel point on another thread?
您需要仅在那些预期处理信号的线程中允许(取消屏蔽)信号,并在其他线程中阻止它们。
OS 会 deliver a process-directed signal to any thread 可以收到它。您的终端的 SIGINT 被发送到前台进程组中的每个进程,OS 决定每个进程的哪个线程将接收它。
如果您只有两个线程,其中一个线程在 pselect
中以原子方式取消屏蔽 SIGINT,而另一个线程阻塞了 SIGINT,那么 OS 会将 SIGINT 传递给前者。如果两者(或两者都不能)处理 SIGINT,OS 将选择其中之一。
警告:当两个线程都屏蔽了 INT 时,您的代码可能会“错过”生成的 SIGINT:
time thr1 thr2
---- ---------- ------
0 block(INT) -
1 run thread (awake) <---- SIGINT
3 join() pselect()
4 ... ...
如果信号到达 thr2 的 pselect
之外,OS 会发现两个线程都有信号被阻塞。在这种情况下,OS 可以选择它喜欢的任何线程来将信号挂起,并且可以选择永远不会解除阻塞的 thr1。 SIGINT 将丢失。
这可能适合您的应用程序,也可能不适合。
如您所见,我的问题是 sigaction()
已将信号处理程序连接到 main()
线程和 runSelect()
线程,因此 SIGINT 信号可能会被 main()
捕获.
现在我准备了一个只有主线程处理SIGINT信号并发送SIGUSR1信号给特定线程的版本pthread_kill()
。
#include <signal.h>
#include <unistd.h>
#include <iostream>
#include <thread>
#include <chrono>
using namespace std;
pthread_t nativeHandle;
class RaiiObject
{
public:
RaiiObject() { cerr << "RaiiObject ctor" << endl; }
~RaiiObject() { cerr << "RaiiObject dtor" << endl; }
};
static void sigintHandler(int)
{
write(2, "INT\n", 4);
pthread_kill(nativeHandle, SIGUSR1);
}
static void sigusrHandler(int)
{
write(2, "USR\n", 4);
}
static void blockSigint()
{
sigset_t blockset;
sigemptyset(&blockset);
sigaddset(&blockset, SIGINT);
sigprocmask(SIG_BLOCK, &blockset, NULL);
}
static void setSigintHandler()
{
struct sigaction sa;
sa.sa_handler = sigintHandler;
sa.sa_flags = 0;
sigemptyset(&sa.sa_mask);
sigaction(SIGINT, &sa, NULL);
}
static void setSigusrHandler()
{
struct sigaction sa;
sa.sa_handler = sigusrHandler;
sa.sa_flags = 0;
sigemptyset(&sa.sa_mask);
sigaction(SIGUSR1, &sa, NULL);
}
void runSelect()
{
sigset_t emptyset;
sigemptyset(&emptyset);
blockSigint();
setSigusrHandler();
RaiiObject RaiiObject{};
fd_set fdRead;
while (true) {
cerr << "Loop iteration" << endl;
FD_ZERO(&fdRead);
FD_SET(0, &fdRead);
while (true) {
if (pselect(FD_SETSIZE, &fdRead, NULL, NULL, NULL, &emptyset) > 0) {
cerr << "Select" << endl;
return;
} else {
cerr << "Select break" << endl;
return;
}
}
}
}
int main()
{
cerr << "Main start" << endl;
cerr << "Thread start" << endl;
thread runSelectThread{runSelect};
nativeHandle = runSelectThread.native_handle();
setSigintHandler();
runSelectThread.join();
cerr << "Main exit" << endl;
return EXIT_SUCCESS;
}
Main start
Thread start
RaiiObject ctor
Loop iteration
^CINT
USR
Select break
RaiiObject dtor
Main exit