exec.Command with Credential in a new user namespace gets error: "operation not permitted"
exec.Command with Credential in a new user namespace gets error: "operation not permitted"
我想使用 Linux 命名空间实现一个简单的沙箱并执行命令。
为了防止命令写入磁盘,使用 Credential: &syscall.Credential{Uid: uint32(1), Gid: uint32(1)}
作为另一个用户执行命令。
但是,我得到了这个错误:"fork/exec /Main: operation not permitted"。
即使我将代码更改为Credential: &syscall.Credential{Uid: uint32(0), Gid: uint32(0)}
,也会出现同样的错误。
container.go如下:
// +build linux
// +build go1.12
package main
import (
"flag"
"fmt"
uuid "github.com/satori/go.uuid"
"io/ioutil"
"os"
"os/exec"
"os/user"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
"github.com/ZiheLiu/sandbox/sandbox"
"github.com/docker/docker/pkg/reexec"
)
func init() {
// register "justiceInit" => justiceInit() every time
reexec.Register("justiceInit", justiceInit)
/**
* 0. `init()` adds key "justiceInit" in `map`;
* 1. reexec.Init() seeks if key `os.Args[0]` exists in `registeredInitializers`;
* 2. for the first time this binary is invoked, the key is os.Args[0], AKA "/path/to/clike_container",
which `registeredInitializers` will return `false`;
* 3. `main()` calls binary itself by reexec.Command("justiceInit", args...);
* 4. for the second time this binary is invoked, the key is os.Args[0], AKA "justiceInit",
* which exists in `registeredInitializers`;
* 5. the value `justiceInit()` is invoked, any hooks(like set hostname) before fork() can be placed here.
*/
if reexec.Init() {
os.Exit(0)
}
}
func justiceInit() {
command := os.Args[1]
timeout, _ := strconv.ParseInt(os.Args[2], 10, 32)
cmd := exec.Command(command)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
// set uid and gid as another user
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
Credential: &syscall.Credential{Uid: uint32(1), Gid: uint32(1)},
}
cmd.Env = []string{"PS1=[justice] # "}
// got the error "fork/exec /Main: operation not permitted" here
if err := cmd.Run(); err != nil {
_, _ = os.Stderr.WriteString(fmt.Sprintf("%s\n", err.Error()))
}
}
// logs will be printed to os.Stderr
func main() {
command := flag.String("command", "./Main", "the command needed to be execute in sandbox")
username := flag.String("username", "root", "the user to execute command")
flag.Parse()
u, err := user.Lookup(*username)
if err != nil {
_, _ = os.Stderr.WriteString(fmt.Sprintf("%s\n", err.Error()))
os.Exit(0)
}
uid, _ := strconv.Atoi(u.Uid)
gid, _ := strconv.Atoi(u.Gid)
cmd := reexec.Command("justiceInit", *basedir, *command, *timeout)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWNS |
syscall.CLONE_NEWUTS |
syscall.CLONE_NEWIPC |
syscall.CLONE_NEWPID |
syscall.CLONE_NEWNET |
syscall.CLONE_NEWUSER,
UidMappings: []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: os.Getuid(),
Size: 1,
},
{
ContainerID: 1,
HostID: uid,
Size: 1,
},
},
GidMappings: []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: os.Getgid(),
Size: 1,
},
{
ContainerID: 1,
HostID: gid,
Size: 1,
},
},
}
if err := cmd.Run(); err != nil {
_, _ = os.Stderr.WriteString(fmt.Sprintf("%s\n", err.Error()))
}
os.Exit(0)
}
当我运行sudo ./container -command='/Main' -username='nobody'
时,出现错误"fork/exec /Main: operation not permitted"。
justiceInit
的用户命名空间中的用户应该是root,但是不能使用Credential
设置uid和gid。
我是 linux 和命名空间的新手。也许我误解了什么。我应该如何修复此错误?非常感谢!
根据@Charles Duffy的推荐,我追溯了cmd.Run()
的源码,发现:
type SysProcAttr struct {
UidMappings []SysProcIDMap // User ID mappings for user namespaces.
GidMappings []SysProcIDMap // Group ID mappings for user namespaces.
// GidMappingsEnableSetgroups enabling setgroups syscall.
// If false, then setgroups syscall will be disabled for the child process.
// This parameter is no-op if GidMappings == nil. Otherwise for unprivileged
// users this should be set to false for mappings work.
GidMappingsEnableSetgroups bool
}
因此,如果GidMappingsEnableSetgroups
的值默认为false
,则子进程justiceInit
将无权使用setgroups
系统调用,无论其是否具有根特权。
因此,当我在函数 main
中将 cmd.SysProcAttr.GidMappingsEnableSetgroups
设置为 true
时,如下所示,它起作用了!
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.SysProcAttr = &syscall.SysProcAttr{
// ...
GidMappingsEnableSetgroups: true,
}
我想使用 Linux 命名空间实现一个简单的沙箱并执行命令。
为了防止命令写入磁盘,使用 Credential: &syscall.Credential{Uid: uint32(1), Gid: uint32(1)}
作为另一个用户执行命令。
但是,我得到了这个错误:"fork/exec /Main: operation not permitted"。
即使我将代码更改为Credential: &syscall.Credential{Uid: uint32(0), Gid: uint32(0)}
,也会出现同样的错误。
container.go如下:
// +build linux
// +build go1.12
package main
import (
"flag"
"fmt"
uuid "github.com/satori/go.uuid"
"io/ioutil"
"os"
"os/exec"
"os/user"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
"github.com/ZiheLiu/sandbox/sandbox"
"github.com/docker/docker/pkg/reexec"
)
func init() {
// register "justiceInit" => justiceInit() every time
reexec.Register("justiceInit", justiceInit)
/**
* 0. `init()` adds key "justiceInit" in `map`;
* 1. reexec.Init() seeks if key `os.Args[0]` exists in `registeredInitializers`;
* 2. for the first time this binary is invoked, the key is os.Args[0], AKA "/path/to/clike_container",
which `registeredInitializers` will return `false`;
* 3. `main()` calls binary itself by reexec.Command("justiceInit", args...);
* 4. for the second time this binary is invoked, the key is os.Args[0], AKA "justiceInit",
* which exists in `registeredInitializers`;
* 5. the value `justiceInit()` is invoked, any hooks(like set hostname) before fork() can be placed here.
*/
if reexec.Init() {
os.Exit(0)
}
}
func justiceInit() {
command := os.Args[1]
timeout, _ := strconv.ParseInt(os.Args[2], 10, 32)
cmd := exec.Command(command)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
// set uid and gid as another user
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
Credential: &syscall.Credential{Uid: uint32(1), Gid: uint32(1)},
}
cmd.Env = []string{"PS1=[justice] # "}
// got the error "fork/exec /Main: operation not permitted" here
if err := cmd.Run(); err != nil {
_, _ = os.Stderr.WriteString(fmt.Sprintf("%s\n", err.Error()))
}
}
// logs will be printed to os.Stderr
func main() {
command := flag.String("command", "./Main", "the command needed to be execute in sandbox")
username := flag.String("username", "root", "the user to execute command")
flag.Parse()
u, err := user.Lookup(*username)
if err != nil {
_, _ = os.Stderr.WriteString(fmt.Sprintf("%s\n", err.Error()))
os.Exit(0)
}
uid, _ := strconv.Atoi(u.Uid)
gid, _ := strconv.Atoi(u.Gid)
cmd := reexec.Command("justiceInit", *basedir, *command, *timeout)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWNS |
syscall.CLONE_NEWUTS |
syscall.CLONE_NEWIPC |
syscall.CLONE_NEWPID |
syscall.CLONE_NEWNET |
syscall.CLONE_NEWUSER,
UidMappings: []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: os.Getuid(),
Size: 1,
},
{
ContainerID: 1,
HostID: uid,
Size: 1,
},
},
GidMappings: []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: os.Getgid(),
Size: 1,
},
{
ContainerID: 1,
HostID: gid,
Size: 1,
},
},
}
if err := cmd.Run(); err != nil {
_, _ = os.Stderr.WriteString(fmt.Sprintf("%s\n", err.Error()))
}
os.Exit(0)
}
当我运行sudo ./container -command='/Main' -username='nobody'
时,出现错误"fork/exec /Main: operation not permitted"。
justiceInit
的用户命名空间中的用户应该是root,但是不能使用Credential
设置uid和gid。
我是 linux 和命名空间的新手。也许我误解了什么。我应该如何修复此错误?非常感谢!
根据@Charles Duffy的推荐,我追溯了cmd.Run()
的源码,发现:
type SysProcAttr struct {
UidMappings []SysProcIDMap // User ID mappings for user namespaces.
GidMappings []SysProcIDMap // Group ID mappings for user namespaces.
// GidMappingsEnableSetgroups enabling setgroups syscall.
// If false, then setgroups syscall will be disabled for the child process.
// This parameter is no-op if GidMappings == nil. Otherwise for unprivileged
// users this should be set to false for mappings work.
GidMappingsEnableSetgroups bool
}
因此,如果GidMappingsEnableSetgroups
的值默认为false
,则子进程justiceInit
将无权使用setgroups
系统调用,无论其是否具有根特权。
因此,当我在函数 main
中将 cmd.SysProcAttr.GidMappingsEnableSetgroups
设置为 true
时,如下所示,它起作用了!
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.SysProcAttr = &syscall.SysProcAttr{
// ...
GidMappingsEnableSetgroups: true,
}