无法进入由 setuid 进程创建的挂载命名空间

Can't enter mount namespace created by a setuid process

root 拥有的 setuid 位守护程序切换回真实用户并创建挂载命名空间。

设置了 CAP_SYS_ADMINCAP_SYS_CHROOT 位的用户拥有的可执行文件试图进入该命名空间但失败了。

daemon.c:

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/capability.h>
#include <sys/prctl.h>
#include <unistd.h>

int main(int argc, const char* argv[])
{
  prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != -1 || (perror(0), exit(1), 0);

  setuid(getuid()) != -1 || (perror(0), exit(1), 0);
  setgid(getgid()) != -1 || (perror(0), exit(1), 0);

  cap_t cap = cap_init();
  cap || (perror(0), exit(1), 0);
  cap_value_t cap_values[] = {CAP_SYS_ADMIN};
  cap_set_flag(cap, CAP_EFFECTIVE, sizeof(cap_values) / sizeof(cap_values[0]), cap_values, CAP_SET) != -1 || (perror(0), exit(1), 0);
  cap_set_flag(cap, CAP_PERMITTED, sizeof(cap_values) / sizeof(cap_values[0]), cap_values, CAP_SET) != -1 || (perror(0), exit(1), 0);
  cap_set_proc(cap) != -1 || (perror(0), exit(1), 0);
  cap_free(cap) != -1 || (perror(0), exit(1), 0);

  unshare(CLONE_NEWNS) != -1 || (perror(0), exit(1), 0);

  pause();

  return 0;
}

client.c:

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <errno.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <unistd.h>

int main(int argc, const char* argv[])
{
  argc == 2 || (perror(0), exit(1), 0);

  const int fd = syscall(SYS_pidfd_open, atoi(argv[1]), 0);
  fd != -1 || (perror(0), exit(1), 0);

  setns(fd, CLONE_NEWNS) != -1 || (perror(0), exit(1), 0);

  return 0;
}

build-run.sh:

#!/bin/bash

gcc -o daemon{,.c} -lcap
gcc -o client{,.c} -lcap

sudo chown root:root ./daemon; sudo chmod u+s ./daemon
sudo setcap cap_sys_admin,cap_sys_chroot+ep ./client

./daemon &
./client $!

给出“不允许操作”- setns() 调用失败。它具有正确的功能,用户命名空间是相同的。怎么了?

尝试使用:

sudo setcap cap_sys_admin,cap_sys_chroot,cap_sys_ptrace=ep ./client

要求 cap_sys_ptrace 能力的细节似乎隐藏在代码注释的 kernel patch 中:

+ * This syscall gets a copy of a file descriptor from another process
+ * based on the pidfd, and file descriptor number. It requires that
+ * the calling process has the ability to ptrace the process represented
+ * by the pidfd. The process which is having its file descriptor copied
+ * is otherwise unaffected.