pthread_kill() 从第二个线程调用时给出分段错误

Question

我正在尝试手动中断程序的主线程，当它在 read() 系统调用中被阻塞时。我在第二个线程中通过调用 pthread_kill() 执行此操作，但是发生了分段错误。但是，如果我将对 read() 的调用放在第二个线程中，即不是主线程并从主线程调用 pthread_kill()，那么一切都会按预期工作。

例如，以下代码导致分段错误，我在第二个线程中调用 pthread_kill()，大约在它启动 2 秒后。它使用通过调用（在主线程中）获得的主线程的 pthread_t 到 pthread_self():

示例 1

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <termios.h>
#include <stdio.h>
#include <string.h>
#include <sys/ioctl.h>
#include <string.h>
#include <errno.h>
#include <syslog.h>
#include <unistd.h>
#include <signal.h>


static int fd = 0;
unsigned char buf[255];
static pthread_t s;
void sigHandler(int sig){
    printf("Signal handler called.\n");
}

void * closeFD(void *arg){
    printf("Second thread started.\n");
    sleep(2);
    int r = pthread_kill(s, SIGUSR1);
}

int main(char *argv[], int argc){
    struct termios newtio;
    pthread_t t1;
    unsigned char buf[255];
    void *res;
    struct sigaction int_handler = {.sa_handler=sigHandler};
    sigaction(SIGUSR1,&int_handler,0);
    s = pthread_self();
    printf("Process id is: %d.\n", getpid());
    fd = open("/dev/ttyS0", O_RDONLY | O_NOCTTY);
    if (fd != -1){
        bzero(&newtio, sizeof(newtio));
        newtio.c_cflag = B2400 | CS7 | CLOCAL | CREAD ;
        newtio.c_iflag = ICRNL;
        newtio.c_oflag = 0;
        newtio.c_lflag = ~ICANON;
        newtio.c_cc[VMIN]     = 14;
        tcsetattr(fd,TCSANOW,&newtio);
        pthread_create(&t1, NULL, closeFD, NULL);
        printf("Reading ..\n");
        read(fd,buf,255);
        close(fd);
    }
    return 0;
}

除了我在第二个线程（在 closeFD() 中）调用 read() 并按预期工作之外，以下代码是相同的。当主线程等待它退出然后退出时，第二个线程解除阻塞并终止。

示例 2：

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <termios.h>
#include <stdio.h>
#include <string.h>
#include <sys/ioctl.h>
#include <string.h>
#include <errno.h>
#include <syslog.h>
#include <unistd.h>
#include <signal.h>


static int fd = 0;
unsigned char buf[255];
static pthread_t s;
void sigHandler(int sig){
    printf("Signal handler called.\n");
}

void * closeFD(void *arg){
    printf("Second thread started.\n");
    read(fd,buf,255);
    printf("Read interrupted.\n");
}

int main(char *argv[], int argc){
    struct termios newtio;
    pthread_t t1;
    unsigned char buf[255];
    void *res;
        struct sigaction int_handler = {.sa_handler=sigHandler};
        sigaction(SIGUSR1,&int_handler,0);
        s = pthread_self();
        printf("Process id is: %d.\n", getpid());
        fd = open("/dev/ttyS0", O_RDONLY | O_NOCTTY);
        if (fd != -1){
            bzero(&newtio, sizeof(newtio));
            newtio.c_cflag = B2400 | CS7 | CLOCAL | CREAD ;
            newtio.c_iflag = ICRNL;
            newtio.c_oflag = 0;
            newtio.c_lflag = ~ICANON;
            newtio.c_cc[VMIN]     = 14;
            tcsetattr(fd,TCSANOW,&newtio);
            pthread_create(&t1, NULL, closeFD, NULL);
            sleep(2);
            int r = pthread_kill(t1, SIGUSR1);
            pthread_join(t1, &res);
            close(fd);
        }
    return 0;
}

到目前为止我还没有找到具体的参考资料说明从第二个（同一进程内）终止主线程是非法操作，所以我做错了什么？

更新#1
感谢所有回复的人，不过我要说清楚几点：

我知道在信号处理程序中使用 printf 是不安全的，但这是一个示例，它不是分段错误的原因，尽管它是一个有效的观点。从信号处理程序中取出 printf() 仍然会导致分段错误。示例 2 在信号处理程序内部或外部使用 printf()。
我知道发送 SIGUSR 不会终止程序。然而，通过使用 pthread_kill(pthread_t thread, int signal) 它将向线程 thread 发送一个信号并且它将解除阻塞（如果它确实被阻塞）。这是我想要的操作，这是示例 2 中实际发生的情况，这是我的理解应该在两个示例中发生，但在示例 1 中不会发生。
在描述示例 1 时，我在表示 'thread' 时使用了术语 'method'，其中我提到了对 pthread_kill().

此外，引用自 'Programming with POSIX Threads'，David R. Butenhof，第 6.6.3 节 p217 'pthread_kill':

Within a process, one thread can send a signal to a specific thread (including itself) by calling pthread_kill.

话虽如此，以下示例也给出了分段错误：

示例 3

#include <stdio.h>
#include <string.h>
#include <string.h>
#include <signal.h>



static pthread_t s;
int value = 0;

void sigHandler(int sig){
    value = 1;
}


int main(char *argv[], int argc){

    struct sigaction int_handler = {.sa_handler=sigHandler};
    sigaction(SIGUSR1,&int_handler,0);
    s = pthread_self();
    printf("The value of 'value' is %d.\n", value);
    printf("Process id is: %d.\n", getpid());
    int r = pthread_kill(s, SIGUSR1);
    printf("The value of 'value' is %d.\n", value);
    return 0;
}

如果不是对 sigaction() 的调用被对 signal() 的（不可移植的）调用所取代，这也会失败。考虑到第三个示例，它非常简单，我找不到任何文件明确说明这是非法行为。事实上，引用的参考表明它是允许的！

Answer 1

您正在使用 printf()，这 async-signal safe, and you're not initializing your struct sigaction 不正确（特别是信号掩码未定义）。

第三，发送 SIGUSR1 信号，安装了处理程序，不会也不应该终止主线程。你只是向它发送一个信号，仅此而已。

正如 Jens Gustedt 在他对原始问题的评论中提到的，这两个程序都有未定义的行为。因此，我不会尝试猜测到底是哪一部分未定义行为导致了分段错误（在第一个程序中）。

相反，我将向您展示一个工作示例。

为了debugging/testing目的，我喜欢从异步信号安全输出函数开始，基于write(2):

#define  _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <string.h>
#include <termios.h>
#include <pthread.h>
#include <errno.h>
#include <time.h>

#define  MYSIGNAL  SIGUSR1

#define  SECONDS   10

static int wrstr(const int descriptor, const char *p, const char *const q)
{
    while (p < q) {
        ssize_t n;

        n = write(descriptor, p, (size_t)(q - p));
        if (n > (ssize_t)0)
            p += n;
        else
        if (n != (ssize_t)-1)
            return EIO;
        else
        if (errno != EINTR && errno != EAGAIN && errno != EWOULDBLOCK)
            return errno;
    }

    return 0;
}

static const char *ends(const char *s)
{
    if (s)
        while (*s != '[=10=]')
            s++;
    return s;
}

static int wrout(const char *const p)
{
    if (p != NULL && *p != '[=10=]') {
        int saved_errno, result;
        saved_errno = errno;
        result = wrstr(STDOUT_FILENO, p, ends(p));
        errno = saved_errno;
        return result;
    } else
        return 0;
}

static int wrouti(const int value)
{
    char          buffer[32];
    char         *p = buffer + sizeof buffer;
    unsigned int  u;

    if (value < 0)
        u = -(long)value;
    else
        u = value;

    do {
        *(--p) = '0' + (u % 10U);
        u /= 10U;
    } while (u > 0U);

    if (value < 0)
        *(--p) = '-';

    return wrstr(STDOUT_FILENO, p, buffer + sizeof buffer);
}

static int wrerr(const char *const p)
{
    if (p != NULL && *p != '[=10=]') {
        int saved_errno, result;
        saved_errno = errno;
        result = wrstr(STDERR_FILENO, p, ends(p));
        errno = saved_errno;
        return result;
    } else
        return 0;
}

以上函数是异步信号安全的，因此可以在信号处理程序中使用。 wrout() 和 wrerr() 也保留 errno 不变，这很有用。顺便说一下，在信号处理程序中保存和恢复 errno 通常被忽略，尽管我确实相信有一些奇怪的极端情况可能很重要。 wrouti() 只是一个粗略的十进制带符号整数打印机，也是异步信号安全的，但它不会保留 errno 不变。

接下来，让我们定义信号处理程序本身，以及它的安装程序函数。（我喜欢这样做，使 main() 更简单。）

static volatile sig_atomic_t handled = 0;

static void handler(int signum)
{
    wrerr("Signal received.\n");
    handled = signum;
}

static int install_handler(const int signum)
{
    struct sigaction act;

    /* memset(&act, 0, sizeof act); */   
    sigemptyset(&act.sa_mask);
    act.sa_handler = handler;
    act.sa_flags = 0;

    if (sigaction(signum, &act, NULL))
        return errno;

    return 0;
}

建议使用注释掉的 memset，但不是正常操作所必需的。然而，sigemptyset() 是清除阻塞信号集所必需的。

接下来我们来看线程函数。您不应该使用 sleep()，因为它会与信号交互；使用 POSIX.1-2001 nanosleep() 代替。

static void *worker(void *target)
{
    struct timespec duration, left;
    int retval;

    wrout("Worker started. Sleeping ");
    wrouti((int)SECONDS);
    wrout(" seconds...\n");

    duration.tv_sec = SECONDS;
    duration.tv_nsec = 0;
    left.tv_sec = 0;
    left.tv_nsec = 0;

    while (1) {
        retval = nanosleep(&duration, &left);
        if (retval == 0)
            break;

        if (left.tv_sec <= 0 ||
            (left.tv_sec == 0 && left.tv_nsec <= 0))
            break;

        duration = left;
        left.tv_sec = 0;
        left.tv_nsec = 0;
    }

    wrout("Sleep complete.\n");

    if (target) {
        wrout("Sending signal...\n");

        retval = pthread_kill(*(pthread_t *)target, MYSIGNAL);
        if (retval == 0)
            wrout("Signal sent successfully.\n");
        else {
            const char *const errmsg = strerror(retval);
            wrout("Failed to send signal: ");
            wrout(errmsg);
            wrout(".\n");
        }
    }

    wrout("Thread done.\n");
    return NULL;
}

给线程函数的指针应该指向信号指向的线程标识符 (pthread_t)。

请注意，如果信号传送到此特定线程或被该特定线程捕获，则 nanosleep() 可能会被信号传送中断。如果发生这种情况，nanosleep() 会告诉我们还剩多少时间可以睡觉。上面的循环显示了如何确保您至少在指定时间休眠，即使被信号传输打断也是如此。

最后是main()。我没有打开特定设备，而是使用标准输入。要重现 OP 的程序，请在执行时从 /dev/ttyUSB0 重定向标准输入，即 ./program < /dev/ttyUSB0。

int main(void)
{
    pthread_t main_thread, worker_thread;
    pthread_attr_t attrs;
    struct termios original, settings;
    int result;

    if (!isatty(STDIN_FILENO)) {
        wrerr("Standard input is not a terminal.\n");
        return EXIT_FAILURE;
    }

    if (tcgetattr(STDIN_FILENO, &original) != 0 ||
        tcgetattr(STDIN_FILENO, &settings) != 0) {
        const char *const errmsg = strerror(errno);
        wrerr("Cannot get terminal settings: ");
        wrerr(errmsg);
        wrerr(".\n");
        return EXIT_FAILURE;
    }

    settings.c_lflag = ~ICANON;
    settings.c_cc[VMIN] = 14;

    if (tcsetattr(STDIN_FILENO, TCSANOW, &settings) != 0) {
        const char *const errmsg = strerror(errno);
        tcsetattr(STDIN_FILENO, TCSAFLUSH, &original);
        wrerr("Cannot set terminal settings: ");
        wrerr(errmsg);
        wrerr(".\n");
        return EXIT_FAILURE;
    }

    wrout("Terminal is now in raw mode.\n");

    if (install_handler(MYSIGNAL)) {
        const char *const errmsg = strerror(errno);
        wrerr("Cannot install signal handler: ");
        wrerr(errmsg);
        wrerr(".\n");
        return EXIT_FAILURE;
    }

    main_thread = pthread_self();

    pthread_attr_init(&attrs);
    pthread_attr_setstacksize(&attrs, 65536);

    result = pthread_create(&worker_thread, &attrs, worker, &main_thread);
    if (result != 0) {
        const char *const errmsg = strerror(errno);
        tcsetattr(STDIN_FILENO, TCSAFLUSH, &original);
        wrerr("Cannot create a worker thread: ");
        wrerr(errmsg);
        wrerr(".\n");
        return EXIT_FAILURE;
    }

    pthread_attr_destroy(&attrs);

    wrout("Waiting for input...\n");

    while (1) {
        char    buffer[256];
        ssize_t n;

        if (handled) {
            wrout("Because signal was received, no more input is read.\n");
            break;
        }

        n = read(STDIN_FILENO, buffer, sizeof buffer);
        if (n > (ssize_t)0) {
            wrout("Read ");
            wrouti((int)n);
            wrout(" bytes.\n");
            continue;

        } else
        if (n == (ssize_t)0) {
            wrout("End of input.\n");
            break;

        } else
        if (n != (ssize_t)-1) {
            wrout("read() returned an invalid value.\n");
            break;

        } else {
            result = errno;

            wrout("read() == -1, errno == ");
            wrouti(result);
            wrout(": ");
            wrout(strerror(result));
            wrout(".\n");
            break;
        }
    }

    wrout("Reaping the worker thread..\n");
    result = pthread_join(worker_thread, NULL);
    if (result != 0) {
        wrout("Failed to reap worker thread: ");
        wrout(strerror(result));
        wrout(".\n");
    } else
        wrout("Worker thread reaped successfully.\n");

    tcsetattr(STDIN_FILENO, TCSAFLUSH, &original);
    wrout("Terminal reverted back to original mode.\n");

    return EXIT_SUCCESS;
}

因为使用终端进行测试更有趣，所以上面努力将终端恢复到 returning 之前的原始状态。

请注意，由于 termios 结构中的 VMIN 字段设置为 14，因此 read() 会阻塞，直到缓冲区中至少有 14 个字节可用。如果传送信号，如果缓冲区中至少有一个字节，则 短计数 被 return 编辑。因此，您不能指望 read() 总是 return 14 个字节，也不能指望它在每次发送信号时 return -1 和 errno == EINTR！试用这个程序非常有用，可以在你的脑海中理清这些。

我不记得 Linux 中的 USB 串行驱动程序是否曾经生成 EPIPE 或引发 SIGPIPE，但这肯定会在使用管道时发生。使用管道时，最常见的原因是在读取已经 return 归零（输入结束）后尝试读取。除非被信号处理程序忽略或捕获，否则进程会像分段错误一样死掉，除了原因是 SIGPIPE 信号而不是 SIGSEGV。对于类似终端的字符设备，这取决于驱动程序，我似乎记得。

最后，我在天气（流感）下写了上面的代码，所以tharrr可能会有错误。应该是POSIX.1 C99代码，gcc -Wall -pedantic不吐槽，不过脑袋塞了，这里不做任何承诺。非常欢迎修复！

有问题吗？评论？

Answer 2

您忘记了 #include <pthread.h>。这在最近的 Linux 系统上为我修复了示例 #3 中的段错误。

--- pthread_kill-self.c.orig    2015-01-06 14:08:54.949000690 -0600
+++ pthread_kill-self.c 2015-01-06 14:08:59.820998965 -0600
@@ -1,6 +1,6 @@
 #include <stdio.h>
 #include <string.h>
-#include <string.h>
+#include <pthread.h>
 #include <signal.h>

然后...

$:- gcc -o pthread_kill-self pthread_kill-self.c -pthread
$:- ./pthread_kill-self 
The value of 'value' is 0.
Process id is: 3152.
The value of 'value' is 1.

pthread_kill() 从第二个线程调用时给出分段错误

pthread_kill() gives segmentation fault when called from second thread

c

pthreads