我是否必须阻塞主线程上的信号才能处理另一个线程上的取消点?

Do I have to block signals on the main thread to handle cancel point on another thread?

当我在专用线程中处理 TCP 服务器 运行 时,我注意到信号处理中的奇怪行为。我准备了以下 MWE(我使用 cerr 来避免调试打印时的竞争条件):

#include <signal.h>
#include <unistd.h>

#include <iostream>
#include <thread>
#include <chrono>

using namespace std;

#undef THREAD

class RaiiObject
{
public:
    RaiiObject() { cerr << "RaiiObject ctor" << endl; }
    ~RaiiObject() { cerr << "RaiiObject dtor" << endl; }
};

static void signalHandler(int sig)
{
    write(2, "Signal\n", 7);
}

static void blockSigint()
{
    sigset_t blockset;

    sigemptyset(&blockset);
    sigaddset(&blockset, SIGINT);
    sigprocmask(SIG_BLOCK, &blockset, NULL);
}

static void setSigintHandler()
{
    struct sigaction sa;
    sa.sa_handler = signalHandler;
    sa.sa_flags = 0;
    sigemptyset(&sa.sa_mask);
    sigaction(SIGINT, &sa, NULL);
}

void runSelect()
{
    sigset_t emptyset;
    sigemptyset(&emptyset);

    setSigintHandler();

    RaiiObject RaiiObject{};
    fd_set fdRead;

    while (true) {
        cerr << "Loop iteration" << endl;
        FD_ZERO(&fdRead);
        FD_SET(0, &fdRead);
        while (true) {
            if (pselect(FD_SETSIZE, &fdRead, NULL, NULL, NULL, &emptyset) > 0) {
                cerr << "Select" << endl;
            } else {
                cerr << "Select break" << endl;
                return;
            }
        }
    }
}

int main()
{
    cerr << "Main start" << endl;

#ifdef THREAD
    cerr << "Thread start" << endl;
    //blockSigint();
    thread{runSelect}.join();
#else
    runSelect();
#endif

    cerr << "Main exit" << endl;

    return EXIT_SUCCESS;
}

当我编译单线程程序时(#undef THREAD),我可以用Ctrl-C正确终止runSelect()函数:

Main start
RaiiObject ctor
Loop iteration
^CSignal
Select break
RaiiObject dtor
Main exit

但是当我编译多线程(#define THREAD)程序时,它挂在信号处理程序上:

Main start
RaiiObject ctor
Loop iteration
^CSignal

只有当我用 blockSigint() 阻塞主线程上的信号时,程序才能再次正常工作。

我用 strace -tt -f 检查了程序,我注意到工作版本使用 pselect6()ERESTARTNOHAND:

14:46:53.543360 write(2, "Loop iteration", 14Loop iteration) = 14
14:46:53.543482 write(2, "\n", 1
)       = 1
14:46:53.543586 pselect6(1024, [0], NULL, NULL, NULL, {[], 8}) = ? ERESTARTNOHAND (To be restarted if no handler)
14:46:55.286989 --- SIGINT {si_signo=SIGINT, si_code=SI_USER, si_pid=2707461, si_uid=1000} ---
14:46:55.287120 write(2, "Signal\n", 7Signal
) = 7
14:46:55.287327 rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call)
14:46:55.287569 write(2, "Select break", 12Select break) = 12
14:46:55.287760 write(2, "\n", 1

但损坏的版本使用 futex():

[pid 3469011] 14:48:37.211792 write(2, "Loop iteration", 14Loop iteration) = 14
[pid 3469011] 14:48:37.211916 write(2, "\n", 1
) = 1
[pid 3469011] 14:48:37.212031 pselect6(1024, [0], NULL, NULL, NULL, {[], 8} <unfinished ...>
[pid 3469010] 14:48:40.046146 <... futex resumed>) = ? ERESTARTSYS (To be restarted if SA_RESTART is set)
[pid 3469010] 14:48:40.046256 --- SIGINT {si_signo=SIGINT, si_code=SI_USER, si_pid=2707461, si_uid=1000} ---
[pid 3469010] 14:48:40.046354 write(2, "Signal\n", 7Signal
) = 7
[pid 3469010] 14:48:40.046588 rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call)
[pid 3469010] 14:48:40.046821 futex(0x7f4e5c16b9d0, FUTEX_WAIT, 3469011, NULL) = ? ERESTARTSYS (To be restarted if SA_RESTART is set)

Do I have to block signals on the main thread to handle cancel point on another thread?

您需要仅在那些预期处理信号的线程中允许(取消屏蔽)信号,并在其他线程中阻止它们。

OS 会 deliver a process-directed signal to any thread 可以收到它。您的终端的 SIGINT 被发送到前台进程组中的每个进程,OS 决定每个进程的哪个线程将接收它。

如果您只有两个线程,其中一个线程在 pselect 中以原子方式取消屏蔽 SIGINT,而另一个线程阻塞了 SIGINT,那么 OS 会将 SIGINT 传递给前者。如果两者(或两者都不能)处理 SIGINT,OS 将选择其中之一。

警告:当两个线程都屏蔽了 INT 时,您的代码可能会“错过”生成的 SIGINT:

time  thr1        thr2
----  ----------  ------
  0   block(INT)   - 
  1   run thread  (awake)    <---- SIGINT
  3   join()      pselect()
  4   ...         ...

如果信号到达 thr2 的 pselect 之外,OS 会发现两个线程都有信号被阻塞。在这种情况下,OS 可以选择它喜欢的任何线程来将信号挂起,并且可以选择永远不会解除阻塞的 thr1。 SIGINT 将丢失。

这可能适合您的应用程序,也可能不适合。

如您所见,我的问题是 sigaction() 已将信号处理程序连接到 main() 线程和 runSelect() 线程,因此 SIGINT 信号可能会被 main() 捕获.

现在我准备了一个只有主线程处理SIGINT信号并发送SIGUSR1信号给特定线程的版本pthread_kill()

#include <signal.h>
#include <unistd.h>

#include <iostream>
#include <thread>
#include <chrono>

using namespace std;

pthread_t nativeHandle;

class RaiiObject
{
public:
    RaiiObject() { cerr << "RaiiObject ctor" << endl; }
    ~RaiiObject() { cerr << "RaiiObject dtor" << endl; }
};

static void sigintHandler(int)
{
    write(2, "INT\n", 4);
    pthread_kill(nativeHandle, SIGUSR1);
}

static void sigusrHandler(int)
{
    write(2, "USR\n", 4);
}

static void blockSigint()
{
    sigset_t blockset;

    sigemptyset(&blockset);
    sigaddset(&blockset, SIGINT);
    sigprocmask(SIG_BLOCK, &blockset, NULL);
}

static void setSigintHandler()
{
    struct sigaction sa;
    sa.sa_handler = sigintHandler;
    sa.sa_flags = 0;
    sigemptyset(&sa.sa_mask);
    sigaction(SIGINT, &sa, NULL);
}

static void setSigusrHandler()
{
    struct sigaction sa;
    sa.sa_handler = sigusrHandler;
    sa.sa_flags = 0;
    sigemptyset(&sa.sa_mask);
    sigaction(SIGUSR1, &sa, NULL);
}

void runSelect()
{
    sigset_t emptyset;
    sigemptyset(&emptyset);

    blockSigint();
    setSigusrHandler();

    RaiiObject RaiiObject{};
    fd_set fdRead;

    while (true) {
        cerr << "Loop iteration" << endl;
        FD_ZERO(&fdRead);
        FD_SET(0, &fdRead);
        while (true) {
            if (pselect(FD_SETSIZE, &fdRead, NULL, NULL, NULL, &emptyset) > 0) {
                cerr << "Select" << endl;
                return;
            } else {
                cerr << "Select break" << endl;
                return;
            }
        }
    }
}

int main()
{
    cerr << "Main start" << endl;

    cerr << "Thread start" << endl;
    thread runSelectThread{runSelect};
    nativeHandle = runSelectThread.native_handle();
    setSigintHandler();
    runSelectThread.join();

    cerr << "Main exit" << endl;

    return EXIT_SUCCESS;
}
Main start
Thread start
RaiiObject ctor
Loop iteration
^CINT
USR
Select break
RaiiObject dtor
Main exit