Linux 系统调用:PTRACE_O_TRACECLONE 导致无限期挂起

Linux syscalls: PTRACE_O_TRACECLONE causes indefinite hanging

我有一个二进制文件,我需要从中拦截某个系统调用——在本例中为 unlinkat——并使其不执行任何操作。我有以下代码,适用于单个进程;但是,将 PTRACE_O_TRACECLONE 添加到 ptrace opts 后,在 tracee 调用 clone 后,waitpid 调用将永远挂起。几天来,我一直在互联网的不同部分拉扯我的头发,直到我正在浏览 strace 的来源,并且实际上已经 strace strace 看看我已经 strace 的 strace 是什么 ptracing。

这是源代码——为了便于阅读,我删除了一些内容以使其尽可能精简。

#define _POSIX_C_SOURCE 200112L

// std (i think)
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// linux
#include <sys/ptrace.h>
#include <sys/reg.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/user.h>
#include <sys/wait.h>
#include <unistd.h>

#define OPTS PTRACE_O_TRACESYSGOOD // | PTRACE_O_TRACECLONE | PTRACE_O_TRACEVFORK | PTRACE_O_TRACEFORK
#define WOPTS 0

/* The TRACEE. Executes the process we want to target with PTRACE_TRACEME */
int do_child(int argc, char **argv) {
    char *args[argc + 1];
    memcpy(args, argv, argc * sizeof(char *));
    args[argc] = NULL;

    ptrace(PTRACE_TRACEME);
    kill(getpid(), SIGSTOP);
    return execvp(args[0], args);
}

/* Waits for the next syscall and checks to see if the process has been exited */
int wait_for_syscall(pid_t child) {
    int status;
    while (1) {
        ptrace(PTRACE_SYSCALL, child, 0, 0);
        waitpid(child, &status, WOPTS); // <--- THIS CALL HANGS FOREVER AFTER CLONE
        if (WIFSTOPPED(status) && WSTOPSIG(status) & 0x80)
            return 0;
        if (WIFEXITED(status))
            return 1;
    }
    return -1; // unreachable
}

/* The TRACER. Takes the pid of the child process that we just started and actually does the 
    PTRACE stuff by passing signals back and forth to that process. */
int do_trace(pid_t child) {
    int status, syscall;
    waitpid(child, &status, WOPTS);
    ptrace(PTRACE_SETOPTIONS, child, 0, (unsigned long)OPTS);
    while (1) {
        // ptrace(PTRACE_SYSCALL) really needs to be called twice, first is before entry second is after exit, but idgaf
        if (wait_for_syscall(child) != 0) {
            break;
        }
        syscall = ptrace(PTRACE_PEEKUSER, child, sizeof(long) * ORIG_RAX);
        switch (syscall) {
            case SYS_clone:
                fprintf(stderr, "DEBUG: clone detected\n");
                break;
            case SYS_unlinkat:
                fprintf(stderr, "DEBUG: unlinkat detected\n");
                ptrace(PTRACE_POKEUSER, child, sizeof(long) * RAX, 0); 
                break;
        }
    }

    return 0;
}

int main(int argc, char **argv) {
    if (argc < 2) {
        fprintf(stderr, "Usage: %s prog args\n", argv[0]);
        exit(1);
    }

    pid_t child = fork();
    if (child == 0) {
        return do_child(argc - 1, argv + 1);
    } else {
        return do_trace(child);
    }
    return 0;
}

作为免责声明,我不是 C 开发人员,这些天我主要写 Python,所以很多都是从我找到的不同教程中复制粘贴的,我基本上 added/removed 随机狗屎,直到 gcc 没有给我那么多警告。

根据我所读到的内容,我怀疑问题是关于向所涉及的进程发出信号并等待 SIGTRAP,我只是对在那个级别做什么没有真正的直觉。

解决方案是使用 libseccomp

#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <seccomp.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>

int do_child(int argc, char **argv)
{
    char *args[argc + 1];
    memcpy(args, argv, argc * sizeof(char *));
    args[argc] = NULL;

    return execvp(args[0], args);
}

int main(int argc, char **argv)
{
    if (argc < 2)
    {
        fprintf(stderr, "Usage: %s prog args\n", argv[0]);
        exit(1);
    }

    // Init the filter
    scmp_filter_ctx ctx;
    ctx = seccomp_init(SCMP_ACT_ALLOW); // default allow

    // setup basic whitelist
    seccomp_rule_add(ctx, SCMP_ACT_ERRNO(0), SCMP_SYS(unlinkat), 0);

    // build and load the filter
    seccomp_load(ctx);

    pid_t child = fork();
    if (child == 0)
    {
        return do_child(argc - 1, argv + 1);
    }

    return 0;
}