当我考虑参数和环境时,为什么我从 exec() 得到 E2BIG?
Why am I getting E2BIG from exec() when I'm accounting for the arguments and the environment?
我已经阅读了几页(例如 this one)关于如何在 Linux 上正确解释 ARG_MAX。据我所知,每个 argument/environment var 占用一个指针的大小,加上一个加上它的长度作为以 nul 结尾的字符串本身。填充也可能占用了一些额外的空间。然而,即使在给自己 2K 的额外余量之后,我在尝试 exec()
长命令行时仍然 运行 进入 E2BIG。这是什么原因造成的?
我认为可能是 auxiliary vector, but even adding its size (320 bytes in my case) doesn't help. I'm also well below MAX_ARG_STRINGS
and MAX_ARG_STRLEN
(see https://unix.stackexchange.com/a/120842/56202)。
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
extern char **environ;
size_t arg_size(const char *arg) {
// Count the string as well as the argv/environ pointer to it
size_t size = sizeof(arg);
if (arg) {
size += strlen(arg) + 1;
}
return size;
}
int main() {
size_t arg_max = sysconf(_SC_ARG_MAX);
arg_max -= 2048; // POSIX recommends 2048 bytes of additional headroom
printf("arg_max: %zu\n", arg_max);
size_t size = 320; // For the auxiliary vector
for (char **envp = environ; *envp; ++envp) {
size += arg_size(*envp);
}
size += arg_size(NULL);
char *argv[100001] = {"true"};
size += arg_size(argv[0]);
size += arg_size(NULL);
char *arg = "0123456789ABCDEF0123456789ABCDEF"
"0123456789ABCDEF0123456789ABCDEF";
size_t each_size = arg_size(arg);
size_t i;
for (i = 1; i < 100000 && size + each_size < arg_max; ++i) {
argv[i] = arg;
size += each_size;
}
argv[i] = NULL;
printf("size: %zu, argc: %zu\n", size, i);
assert(size < arg_max);
execvp(argv[0], argv);
perror("execvp()");
return EXIT_FAILURE;
}
$ gcc e2big.c -o e2big && ./e2big
arg_max: 2095104
size: 2095059, argc: 28640
execvp(): Argument list too long
为了完整起见,这是在
$ uname -a
Linux superluminal 4.13.7-1-ARCH #1 SMP PREEMPT Sat Oct 14 20:13:26 CEST 2017 x86_64 GNU/Linux
char *argv[100000] = {"true"};
....
for (i = 1; i < 100000 && size + each_size < arg_max; ++i) {
argv[i] = arg;
size += each_size;
}
argv[i] = NULL;
欢迎来到 UB 世界。循环结束后,i
为 100000,您不应该触摸 argv[100000]
.
纯推测:argv
数组分配在堆栈上,只有编译器知道一次性元素是如何被 printf
破坏的。
想通了!首先,除了 argv[]
和 envp[]
之外,exec()
系统调用实现还出于某种原因将完整路径复制到可执行文件,以一种对 E2BIG 计数的方式:
retval = copy_strings_kernel(1, &bprm->filename, bprm);
if (retval < 0)
goto out;
bprm->exec = bprm->p;
retval = copy_strings(bprm->envc, envp, bprm);
if (retval < 0)
goto out;
retval = copy_strings(bprm->argc, argv, bprm);
if (retval < 0)
goto out;
https://github.com/torvalds/linux/blob/v4.13/fs/exec.c#L1775
其次,参数字符串一次复制一页,并且仅以整页的粒度与限制进行比较:
unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
unsigned long ptr_size, limit;
...
ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
if (ptr_size > ULONG_MAX - size)
goto fail;
size += ptr_size;
因此,我不仅需要计算 argument/environment 字符串的总长度,还必须在添加指针大小之前将其四舍五入为页面大小的倍数。或者,为简单起见,只需确保我至少给整页留出空间。
我已经阅读了几页(例如 this one)关于如何在 Linux 上正确解释 ARG_MAX。据我所知,每个 argument/environment var 占用一个指针的大小,加上一个加上它的长度作为以 nul 结尾的字符串本身。填充也可能占用了一些额外的空间。然而,即使在给自己 2K 的额外余量之后,我在尝试 exec()
长命令行时仍然 运行 进入 E2BIG。这是什么原因造成的?
我认为可能是 auxiliary vector, but even adding its size (320 bytes in my case) doesn't help. I'm also well below MAX_ARG_STRINGS
and MAX_ARG_STRLEN
(see https://unix.stackexchange.com/a/120842/56202)。
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
extern char **environ;
size_t arg_size(const char *arg) {
// Count the string as well as the argv/environ pointer to it
size_t size = sizeof(arg);
if (arg) {
size += strlen(arg) + 1;
}
return size;
}
int main() {
size_t arg_max = sysconf(_SC_ARG_MAX);
arg_max -= 2048; // POSIX recommends 2048 bytes of additional headroom
printf("arg_max: %zu\n", arg_max);
size_t size = 320; // For the auxiliary vector
for (char **envp = environ; *envp; ++envp) {
size += arg_size(*envp);
}
size += arg_size(NULL);
char *argv[100001] = {"true"};
size += arg_size(argv[0]);
size += arg_size(NULL);
char *arg = "0123456789ABCDEF0123456789ABCDEF"
"0123456789ABCDEF0123456789ABCDEF";
size_t each_size = arg_size(arg);
size_t i;
for (i = 1; i < 100000 && size + each_size < arg_max; ++i) {
argv[i] = arg;
size += each_size;
}
argv[i] = NULL;
printf("size: %zu, argc: %zu\n", size, i);
assert(size < arg_max);
execvp(argv[0], argv);
perror("execvp()");
return EXIT_FAILURE;
}
$ gcc e2big.c -o e2big && ./e2big arg_max: 2095104 size: 2095059, argc: 28640 execvp(): Argument list too long
为了完整起见,这是在
$ uname -a Linux superluminal 4.13.7-1-ARCH #1 SMP PREEMPT Sat Oct 14 20:13:26 CEST 2017 x86_64 GNU/Linux
char *argv[100000] = {"true"};
....
for (i = 1; i < 100000 && size + each_size < arg_max; ++i) {
argv[i] = arg;
size += each_size;
}
argv[i] = NULL;
欢迎来到 UB 世界。循环结束后,i
为 100000,您不应该触摸 argv[100000]
.
纯推测:argv
数组分配在堆栈上,只有编译器知道一次性元素是如何被 printf
破坏的。
想通了!首先,除了 argv[]
和 envp[]
之外,exec()
系统调用实现还出于某种原因将完整路径复制到可执行文件,以一种对 E2BIG 计数的方式:
retval = copy_strings_kernel(1, &bprm->filename, bprm); if (retval < 0) goto out; bprm->exec = bprm->p; retval = copy_strings(bprm->envc, envp, bprm); if (retval < 0) goto out; retval = copy_strings(bprm->argc, argv, bprm); if (retval < 0) goto out;
https://github.com/torvalds/linux/blob/v4.13/fs/exec.c#L1775
其次,参数字符串一次复制一页,并且仅以整页的粒度与限制进行比较:
unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; unsigned long ptr_size, limit; ... ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); if (ptr_size > ULONG_MAX - size) goto fail; size += ptr_size;
因此,我不仅需要计算 argument/environment 字符串的总长度,还必须在添加指针大小之前将其四舍五入为页面大小的倍数。或者,为简单起见,只需确保我至少给整页留出空间。