c fork() 和 kill() 同时不起作用?
c fork() and kill() at the same time not working?
主程序:启动一定数量的child进程,然后立即发送SIGINT
。
int main()
{
pid_t childs[CHILDS];
char *execv_argv[3];
int n = CHILDS;
execv_argv[0] = "./debugging_procs/wait_time_at_interrupt";
execv_argv[1] = "2";
execv_argv[2] = NULL;
for (int i = 0; i < n; i++)
{
childs[i] = fork();
if (childs[i] == 0)
{
execv(execv_argv[0], execv_argv);
if (errno != 0)
perror(strerror(errno));
_exit(1);
}
}
if (errno != 0)
perror(strerror(errno));
// sleep(1);
for (int i = 0; i < n; i++)
kill(childs[i], SIGINT);
if (errno != 0)
perror(strerror(errno));
// Wait for all children.
while (wait(NULL) > 0);
return 0;
}
分叉程序:等待任何信号,如果发送 SIGINT,则打开某个文件并将 SIGINT 和当前 pid 写入其中并等待指定的秒数(在这种情况下,我从主程序发送 2 ).
#include <signal.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
void sigint_handler(int signum)
{
int fd = open("./aux/log1", O_WRONLY | O_APPEND);
char buf[124];
(void)signum;
sprintf(buf, "SIGINT %d\n", getpid());
write(fd, buf, strlen(buf));
close(fd);
}
int main(int argc, char **argv)
{
int wait_time;
wait_time = (argv[1]) ? atoi(argv[1]) : 5;
signal(SIGINT, &sigint_handler);
// Wait for any signal.
pause();
sleep(wait_time);
return 0;
}
问题是,children 应该写入的日志文件没有 n
行,这意味着并非所有 children 都写入了它。有时没有人写任何东西,主程序根本没有 wait
(意味着在这种情况下 sleep()
没有被调用)。
但是如果我在主程序中取消注释 sleep(1)
,一切都会如我所料。
我怀疑 child 进程没有足够的时间来收听 SIGINT
。
我正在处理的程序是一个任务控件,当我 运行 一个命令时:
restart my_program; restart my_program
我的行为不稳定。当我调用重新启动时,发送一个 SIGINT
,然后调用一个新的 fork()
,然后发送另一个 SIGINT
,就像上面的示例一样。
如何确保所有 children 都能在没有 sleep(1)
行的情况下解析 SIGINT
?我正在测试我的程序是否可以处理在发送 SIGINT 后不会立即退出的程序。
例如,如果我在 child 程序的顶部添加 printf("child process started\n");
,它不会被打印并且主程序不会等待任何东西,除非我 sleep
一秒钟。即使只有 1 个 child 进程也会发生这种情况。
尝试在 for 循环中使用 waitpid() 命令。这样下一个 child 只会在第一个 child 完成后写入
一切正常。你的一些 child 进程在设置信号处理程序之前,甚至在它们开始执行 child 二进制文件之前就被信号杀死了。
在您的 parent 进程中,您可以检查每个进程的标识和退出状态,而不是仅仅 wait()
直到没有更多的 child 进程。将 while (wait(NULL) > 0);
替换为
{
pid_t p;
int status;
while ((p = wait(&status)) > 0) {
if (WIFEXITED(status))
printf("Child %ld exit status was %d.\n", (long)p, WEXITSTATUS(status));
else
if (WIFSIGNALED(status))
printf("Child %ld was killed by signal %d.\n", (long)p, WTERMSIG(status));
else
printf("Child %ld was lost.\n", (long)p);
fflush(stdout);
}
}
你会看到 "missing" child 进程被信号终止了。这意味着 child 进程在准备好捕捉信号之前就被终止了。
我自己编写了示例程序对,具有完整的错误检查功能。我决定使用 sigprocmask()
和 sigwaitinfo()
而不是信号处理程序,只是为了展示另一种方法来做同样的事情(并且不限于信号处理程序中的 async-signal 安全函数).
parent.c:
#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
const char *signal_name(const int signum)
{
static char buffer[32];
switch (signum) {
case SIGINT: return "INT";
case SIGHUP: return "HUP";
case SIGTERM: return "TERM";
default:
snprintf(buffer, sizeof buffer, "%d", signum);
return (const char *)buffer;
}
}
static int compare_pids(const void *p1, const void *p2)
{
const pid_t pid1 = *(const pid_t *)p1;
const pid_t pid2 = *(const pid_t *)p2;
return (pid1 < pid2) ? -1 :
(pid1 > pid2) ? +1 : 0;
}
int main(int argc, char *argv[])
{
size_t count, r, i;
int status;
pid_t *child, *reaped, p;
char dummy;
if (argc < 3 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s COUNT PATH-TO-BINARY [ ARGS ... ]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "This program will fork COUNT child processes,\n");
fprintf(stderr, "each child process executing PATH-TO-BINARY.\n");
fprintf(stderr, "Immediately after all child processes have been forked,\n");
fprintf(stderr, "they are sent a SIGINT signal.\n");
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
if (sscanf(argv[1], " %zu %c", &count, &dummy) != 1 || count < 1) {
fprintf(stderr, "%s: Invalid count.\n", argv[1]);
return EXIT_FAILURE;
}
child = malloc(count * sizeof child[0]);
reaped = malloc(count * sizeof reaped[0]);
if (!child || !reaped) {
fprintf(stderr, "%s: Count is too large; out of memory.\n", argv[1]);
return EXIT_FAILURE;
}
for (i = 0; i < count; i++) {
p = fork();
if (p == -1) {
if (i == 0) {
fprintf(stderr, "Cannot fork child processes: %s.\n", strerror(errno));
return EXIT_FAILURE;
} else {
fprintf(stderr, "Cannot fork child %zu: %s.\n", i + 1, strerror(errno));
count = i;
break;
}
} else
if (!p) {
/* Child process */
execvp(argv[2], argv + 2);
{
const char *errmsg = strerror(errno);
fprintf(stderr, "Child process %ld: Cannot execute %s: %s.\n",
(long)getpid(), argv[2], errmsg);
exit(EXIT_FAILURE);
}
} else {
/* Parent process. */
child[i] = p;
}
}
/* Send all children the INT signal. */
for (i = 0; i < count; i++)
kill(child[i], SIGINT);
/* Reap and report each child. */
r = 0;
while (1) {
p = wait(&status);
if (p == -1) {
if (errno == ECHILD)
break;
fprintf(stderr, "Error waiting for child processes: %s.\n", strerror(errno));
return EXIT_FAILURE;
}
if (r < count)
reaped[r++] = p;
else
fprintf(stderr, "Reaped an extra child process!\n");
if (WIFEXITED(status)) {
switch (WEXITSTATUS(status)) {
case EXIT_SUCCESS:
printf("Parent: Reaped child process %ld: EXIT_SUCCESS.\n", (long)p);
break;
case EXIT_FAILURE:
printf("Parent: Reaped child process %ld: EXIT_FAILURE.\n", (long)p);
break;
default:
printf("Parent: Reaped child process %ld: Exit status %d.\n", (long)p, WEXITSTATUS(status));
break;
}
fflush(stdout);
} else
if (WIFSIGNALED(status)) {
printf("Parent: Reaped child process %ld: Terminated by %s.\n", (long)p, signal_name(WTERMSIG(status)));
fflush(stdout);
} else {
printf("Parent: Reaped child process %ld: Lost.\n", (long)p);
fflush(stdout);
}
}
if (r == count) {
/* Sort both pid arrays. */
qsort(child, count, sizeof child[0], compare_pids);
qsort(reaped, count, sizeof reaped[0], compare_pids);
for (i = 0; i < count; i++)
if (child[i] != reaped[i])
break;
if (i == count)
printf("Parent: All %zu child processes were reaped successfully.\n", count);
}
return EXIT_SUCCESS;
}
child.c:
#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
const char *signal_name(const int signum)
{
static char buffer[32];
switch (signum) {
case SIGINT: return "INT";
case SIGHUP: return "HUP";
case SIGTERM: return "TERM";
default:
snprintf(buffer, sizeof buffer, "%d", signum);
return (const char *)buffer;
}
}
int main(void)
{
const long mypid = getpid();
sigset_t set;
siginfo_t info;
int result;
printf("Child: Child process %ld started!\n", mypid);
fflush(stdout);
sigemptyset(&set);
sigaddset(&set, SIGINT);
sigaddset(&set, SIGHUP);
sigaddset(&set, SIGTERM);
sigprocmask(SIG_BLOCK, &set, NULL);
result = sigwaitinfo(&set, &info);
if (result == -1) {
printf("Child: Child process %ld failed: %s.\n", mypid, strerror(errno));
return EXIT_FAILURE;
}
if (info.si_pid == 0)
printf("Child: Child process %ld terminated by signal %s via terminal.\n", mypid, signal_name(result));
else
if (info.si_pid == getppid())
printf("Child: Child process %ld terminated by signal %s sent by the parent process %ld.\n",
mypid, signal_name(result), (long)info.si_pid);
else
printf("Child: Child process %ld terminated by signal %s sent by process %ld.\n",
mypid, signal_name(result), (long)info.si_pid);
return EXIT_SUCCESS;
}
使用例如
编译两者
gcc -Wall -O2 parent.c -o parent
gcc -Wall -O2 child.c -o child
和运行他们使用例如
./parent 100 ./child
其中 100
是要分叉的 child 个进程的数量,每个 运行ning ./child
.
错误输出到标准错误。从 parent 到标准输出的每一行都以 Parent:
开头,从任何 child 到标准输出的每一行都以 Child:
.
开头
在我的机器上,输出的最后一行始终是 Parent: All # child processes were reaped successfully.
,这意味着每个 child 进程 fork()
ed,都使用 wait()
进行收集和报告.什么都没有丢失,fork()
和 kill()
.
也没有问题
(请注意,如果您指定的 child 个进程多于允许的 fork,parent 程序不会认为这是一个错误,而只会使用允许的 child 测试过程。)
在我的机器上,fork 和 reaping 100 个 child 进程对于 parent 进程来说已经足够了,这样每个 child 进程都可以到达准备捕获的部分信号。
另一方面,parent 可以处理 10 个 child 进程 (运行ning ./parent 10 ./child
) 如此之快以至于每个 child进程在准备好处理信号之前被 INT 信号杀死。
这是一个非常典型的案例的输出,当 运行ning ./parent 20 ./child
:
Child: Child process 19982 started!
Child: Child process 19983 started!
Child: Child process 19984 started!
Child: Child process 19982 terminated by signal INT sent by the parent process 19981.
Child: Child process 19992 started!
Child: Child process 19983 terminated by signal INT sent by the parent process 19981.
Child: Child process 19984 terminated by signal INT sent by the parent process 19981.
Parent: Reaped child process 19982: EXIT_SUCCESS.
Parent: Reaped child process 19985: Terminated by INT.
Parent: Reaped child process 19986: Terminated by INT.
Parent: Reaped child process 19984: EXIT_SUCCESS.
Parent: Reaped child process 19987: Terminated by INT.
Parent: Reaped child process 19988: Terminated by INT.
Parent: Reaped child process 19989: Terminated by INT.
Parent: Reaped child process 19990: Terminated by INT.
Parent: Reaped child process 19991: Terminated by INT.
Parent: Reaped child process 19992: Terminated by INT.
Parent: Reaped child process 19993: Terminated by INT.
Parent: Reaped child process 19994: Terminated by INT.
Parent: Reaped child process 19995: Terminated by INT.
Parent: Reaped child process 19996: Terminated by INT.
Parent: Reaped child process 19983: EXIT_SUCCESS.
Parent: Reaped child process 19997: Terminated by INT.
Parent: Reaped child process 19998: Terminated by INT.
Parent: Reaped child process 19999: Terminated by INT.
Parent: Reaped child process 20000: Terminated by INT.
Parent: Reaped child process 20001: Terminated by INT.
Parent: All 20 child processes were reaped successfully.
在 20 个 child 进程中,有 16 个在执行第一个 printf()
(或 fflush(stdout)
)行之前被 INT 信号杀死。 (我们可以在 execvp()
行之前向 parent.c 添加一个 printf("Child: Child process %ld executing %s\n", (long)getpid(), argv[2]); fflush(stdout);
,以查看是否有任何 child 进程在执行之前被杀死。)
在剩余的四个 child 进程(19982、19983、19984 和 19992)中,一个 (19982) 在第一个 printf()
或 fflush()
之后但在它之前终止设法 运行 setprocmask()
,阻止信号并准备 child 捕捉它。
只有剩下的三个 child 进程(19983、19984 和 19992)捕获了 parent 进程发送的 INT 信号。
如您所见,只需添加完整的错误检查,并添加足够的输出(和 fflush(stdout);
在有用的地方,因为默认情况下标准输出是缓冲的),让您 运行 几个测试用例,并对正在发生的事情构建一个更好的整体图景。
The program I'm working on is a task control and when I run a command like: restart my_program; restart my_program I get an unstable behaviour. When I call restart, a SIGINT is sent, then a new fork() is called then another SIGINT is sent, just like the example above.
在那种情况下,您将在新分叉准备就绪之前发送信号,因此 default disposition of the signal(终止,对于 INT)定义了发生的情况。
这个潜在问题的解决方案各不相同。请注意,它是许多 init system 问题的核心。如果child(这里是my_program
)co-operates很容易解决,但在所有其他情况下都很难。
一个简单的 co-operation 方法是让 child 向其 parent 进程发送一个信号,只要它准备好执行操作。为避免杀死 parent 个未准备好接收此类信息的进程,可以使用默认忽略的信号(例如 SIGWINCH
)。
睡眠一段时间的选项,以便新的 child 进程有足够的时间准备好采取行动,这是一种常见但非常不可靠的缓解此问题的方法。 (具体而言,所需的持续时间取决于 child 进程优先级和机器的总体负载。)
主程序:启动一定数量的child进程,然后立即发送SIGINT
。
int main()
{
pid_t childs[CHILDS];
char *execv_argv[3];
int n = CHILDS;
execv_argv[0] = "./debugging_procs/wait_time_at_interrupt";
execv_argv[1] = "2";
execv_argv[2] = NULL;
for (int i = 0; i < n; i++)
{
childs[i] = fork();
if (childs[i] == 0)
{
execv(execv_argv[0], execv_argv);
if (errno != 0)
perror(strerror(errno));
_exit(1);
}
}
if (errno != 0)
perror(strerror(errno));
// sleep(1);
for (int i = 0; i < n; i++)
kill(childs[i], SIGINT);
if (errno != 0)
perror(strerror(errno));
// Wait for all children.
while (wait(NULL) > 0);
return 0;
}
分叉程序:等待任何信号,如果发送 SIGINT,则打开某个文件并将 SIGINT 和当前 pid 写入其中并等待指定的秒数(在这种情况下,我从主程序发送 2 ).
#include <signal.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
void sigint_handler(int signum)
{
int fd = open("./aux/log1", O_WRONLY | O_APPEND);
char buf[124];
(void)signum;
sprintf(buf, "SIGINT %d\n", getpid());
write(fd, buf, strlen(buf));
close(fd);
}
int main(int argc, char **argv)
{
int wait_time;
wait_time = (argv[1]) ? atoi(argv[1]) : 5;
signal(SIGINT, &sigint_handler);
// Wait for any signal.
pause();
sleep(wait_time);
return 0;
}
问题是,children 应该写入的日志文件没有 n
行,这意味着并非所有 children 都写入了它。有时没有人写任何东西,主程序根本没有 wait
(意味着在这种情况下 sleep()
没有被调用)。
但是如果我在主程序中取消注释 sleep(1)
,一切都会如我所料。
我怀疑 child 进程没有足够的时间来收听 SIGINT
。
我正在处理的程序是一个任务控件,当我 运行 一个命令时:
restart my_program; restart my_program
我的行为不稳定。当我调用重新启动时,发送一个 SIGINT
,然后调用一个新的 fork()
,然后发送另一个 SIGINT
,就像上面的示例一样。
如何确保所有 children 都能在没有 sleep(1)
行的情况下解析 SIGINT
?我正在测试我的程序是否可以处理在发送 SIGINT 后不会立即退出的程序。
例如,如果我在 child 程序的顶部添加 printf("child process started\n");
,它不会被打印并且主程序不会等待任何东西,除非我 sleep
一秒钟。即使只有 1 个 child 进程也会发生这种情况。
尝试在 for 循环中使用 waitpid() 命令。这样下一个 child 只会在第一个 child 完成后写入
一切正常。你的一些 child 进程在设置信号处理程序之前,甚至在它们开始执行 child 二进制文件之前就被信号杀死了。
在您的 parent 进程中,您可以检查每个进程的标识和退出状态,而不是仅仅 wait()
直到没有更多的 child 进程。将 while (wait(NULL) > 0);
替换为
{
pid_t p;
int status;
while ((p = wait(&status)) > 0) {
if (WIFEXITED(status))
printf("Child %ld exit status was %d.\n", (long)p, WEXITSTATUS(status));
else
if (WIFSIGNALED(status))
printf("Child %ld was killed by signal %d.\n", (long)p, WTERMSIG(status));
else
printf("Child %ld was lost.\n", (long)p);
fflush(stdout);
}
}
你会看到 "missing" child 进程被信号终止了。这意味着 child 进程在准备好捕捉信号之前就被终止了。
我自己编写了示例程序对,具有完整的错误检查功能。我决定使用 sigprocmask()
和 sigwaitinfo()
而不是信号处理程序,只是为了展示另一种方法来做同样的事情(并且不限于信号处理程序中的 async-signal 安全函数).
parent.c:
#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
const char *signal_name(const int signum)
{
static char buffer[32];
switch (signum) {
case SIGINT: return "INT";
case SIGHUP: return "HUP";
case SIGTERM: return "TERM";
default:
snprintf(buffer, sizeof buffer, "%d", signum);
return (const char *)buffer;
}
}
static int compare_pids(const void *p1, const void *p2)
{
const pid_t pid1 = *(const pid_t *)p1;
const pid_t pid2 = *(const pid_t *)p2;
return (pid1 < pid2) ? -1 :
(pid1 > pid2) ? +1 : 0;
}
int main(int argc, char *argv[])
{
size_t count, r, i;
int status;
pid_t *child, *reaped, p;
char dummy;
if (argc < 3 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s COUNT PATH-TO-BINARY [ ARGS ... ]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "This program will fork COUNT child processes,\n");
fprintf(stderr, "each child process executing PATH-TO-BINARY.\n");
fprintf(stderr, "Immediately after all child processes have been forked,\n");
fprintf(stderr, "they are sent a SIGINT signal.\n");
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
if (sscanf(argv[1], " %zu %c", &count, &dummy) != 1 || count < 1) {
fprintf(stderr, "%s: Invalid count.\n", argv[1]);
return EXIT_FAILURE;
}
child = malloc(count * sizeof child[0]);
reaped = malloc(count * sizeof reaped[0]);
if (!child || !reaped) {
fprintf(stderr, "%s: Count is too large; out of memory.\n", argv[1]);
return EXIT_FAILURE;
}
for (i = 0; i < count; i++) {
p = fork();
if (p == -1) {
if (i == 0) {
fprintf(stderr, "Cannot fork child processes: %s.\n", strerror(errno));
return EXIT_FAILURE;
} else {
fprintf(stderr, "Cannot fork child %zu: %s.\n", i + 1, strerror(errno));
count = i;
break;
}
} else
if (!p) {
/* Child process */
execvp(argv[2], argv + 2);
{
const char *errmsg = strerror(errno);
fprintf(stderr, "Child process %ld: Cannot execute %s: %s.\n",
(long)getpid(), argv[2], errmsg);
exit(EXIT_FAILURE);
}
} else {
/* Parent process. */
child[i] = p;
}
}
/* Send all children the INT signal. */
for (i = 0; i < count; i++)
kill(child[i], SIGINT);
/* Reap and report each child. */
r = 0;
while (1) {
p = wait(&status);
if (p == -1) {
if (errno == ECHILD)
break;
fprintf(stderr, "Error waiting for child processes: %s.\n", strerror(errno));
return EXIT_FAILURE;
}
if (r < count)
reaped[r++] = p;
else
fprintf(stderr, "Reaped an extra child process!\n");
if (WIFEXITED(status)) {
switch (WEXITSTATUS(status)) {
case EXIT_SUCCESS:
printf("Parent: Reaped child process %ld: EXIT_SUCCESS.\n", (long)p);
break;
case EXIT_FAILURE:
printf("Parent: Reaped child process %ld: EXIT_FAILURE.\n", (long)p);
break;
default:
printf("Parent: Reaped child process %ld: Exit status %d.\n", (long)p, WEXITSTATUS(status));
break;
}
fflush(stdout);
} else
if (WIFSIGNALED(status)) {
printf("Parent: Reaped child process %ld: Terminated by %s.\n", (long)p, signal_name(WTERMSIG(status)));
fflush(stdout);
} else {
printf("Parent: Reaped child process %ld: Lost.\n", (long)p);
fflush(stdout);
}
}
if (r == count) {
/* Sort both pid arrays. */
qsort(child, count, sizeof child[0], compare_pids);
qsort(reaped, count, sizeof reaped[0], compare_pids);
for (i = 0; i < count; i++)
if (child[i] != reaped[i])
break;
if (i == count)
printf("Parent: All %zu child processes were reaped successfully.\n", count);
}
return EXIT_SUCCESS;
}
child.c:
#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
const char *signal_name(const int signum)
{
static char buffer[32];
switch (signum) {
case SIGINT: return "INT";
case SIGHUP: return "HUP";
case SIGTERM: return "TERM";
default:
snprintf(buffer, sizeof buffer, "%d", signum);
return (const char *)buffer;
}
}
int main(void)
{
const long mypid = getpid();
sigset_t set;
siginfo_t info;
int result;
printf("Child: Child process %ld started!\n", mypid);
fflush(stdout);
sigemptyset(&set);
sigaddset(&set, SIGINT);
sigaddset(&set, SIGHUP);
sigaddset(&set, SIGTERM);
sigprocmask(SIG_BLOCK, &set, NULL);
result = sigwaitinfo(&set, &info);
if (result == -1) {
printf("Child: Child process %ld failed: %s.\n", mypid, strerror(errno));
return EXIT_FAILURE;
}
if (info.si_pid == 0)
printf("Child: Child process %ld terminated by signal %s via terminal.\n", mypid, signal_name(result));
else
if (info.si_pid == getppid())
printf("Child: Child process %ld terminated by signal %s sent by the parent process %ld.\n",
mypid, signal_name(result), (long)info.si_pid);
else
printf("Child: Child process %ld terminated by signal %s sent by process %ld.\n",
mypid, signal_name(result), (long)info.si_pid);
return EXIT_SUCCESS;
}
使用例如
编译两者gcc -Wall -O2 parent.c -o parent
gcc -Wall -O2 child.c -o child
和运行他们使用例如
./parent 100 ./child
其中 100
是要分叉的 child 个进程的数量,每个 运行ning ./child
.
错误输出到标准错误。从 parent 到标准输出的每一行都以 Parent:
开头,从任何 child 到标准输出的每一行都以 Child:
.
在我的机器上,输出的最后一行始终是 Parent: All # child processes were reaped successfully.
,这意味着每个 child 进程 fork()
ed,都使用 wait()
进行收集和报告.什么都没有丢失,fork()
和 kill()
.
(请注意,如果您指定的 child 个进程多于允许的 fork,parent 程序不会认为这是一个错误,而只会使用允许的 child 测试过程。)
在我的机器上,fork 和 reaping 100 个 child 进程对于 parent 进程来说已经足够了,这样每个 child 进程都可以到达准备捕获的部分信号。
另一方面,parent 可以处理 10 个 child 进程 (运行ning ./parent 10 ./child
) 如此之快以至于每个 child进程在准备好处理信号之前被 INT 信号杀死。
这是一个非常典型的案例的输出,当 运行ning ./parent 20 ./child
:
Child: Child process 19982 started!
Child: Child process 19983 started!
Child: Child process 19984 started!
Child: Child process 19982 terminated by signal INT sent by the parent process 19981.
Child: Child process 19992 started!
Child: Child process 19983 terminated by signal INT sent by the parent process 19981.
Child: Child process 19984 terminated by signal INT sent by the parent process 19981.
Parent: Reaped child process 19982: EXIT_SUCCESS.
Parent: Reaped child process 19985: Terminated by INT.
Parent: Reaped child process 19986: Terminated by INT.
Parent: Reaped child process 19984: EXIT_SUCCESS.
Parent: Reaped child process 19987: Terminated by INT.
Parent: Reaped child process 19988: Terminated by INT.
Parent: Reaped child process 19989: Terminated by INT.
Parent: Reaped child process 19990: Terminated by INT.
Parent: Reaped child process 19991: Terminated by INT.
Parent: Reaped child process 19992: Terminated by INT.
Parent: Reaped child process 19993: Terminated by INT.
Parent: Reaped child process 19994: Terminated by INT.
Parent: Reaped child process 19995: Terminated by INT.
Parent: Reaped child process 19996: Terminated by INT.
Parent: Reaped child process 19983: EXIT_SUCCESS.
Parent: Reaped child process 19997: Terminated by INT.
Parent: Reaped child process 19998: Terminated by INT.
Parent: Reaped child process 19999: Terminated by INT.
Parent: Reaped child process 20000: Terminated by INT.
Parent: Reaped child process 20001: Terminated by INT.
Parent: All 20 child processes were reaped successfully.
在 20 个 child 进程中,有 16 个在执行第一个 printf()
(或 fflush(stdout)
)行之前被 INT 信号杀死。 (我们可以在 execvp()
行之前向 parent.c 添加一个 printf("Child: Child process %ld executing %s\n", (long)getpid(), argv[2]); fflush(stdout);
,以查看是否有任何 child 进程在执行之前被杀死。)
在剩余的四个 child 进程(19982、19983、19984 和 19992)中,一个 (19982) 在第一个 printf()
或 fflush()
之后但在它之前终止设法 运行 setprocmask()
,阻止信号并准备 child 捕捉它。
只有剩下的三个 child 进程(19983、19984 和 19992)捕获了 parent 进程发送的 INT 信号。
如您所见,只需添加完整的错误检查,并添加足够的输出(和 fflush(stdout);
在有用的地方,因为默认情况下标准输出是缓冲的),让您 运行 几个测试用例,并对正在发生的事情构建一个更好的整体图景。
The program I'm working on is a task control and when I run a command like: restart my_program; restart my_program I get an unstable behaviour. When I call restart, a SIGINT is sent, then a new fork() is called then another SIGINT is sent, just like the example above.
在那种情况下,您将在新分叉准备就绪之前发送信号,因此 default disposition of the signal(终止,对于 INT)定义了发生的情况。
这个潜在问题的解决方案各不相同。请注意,它是许多 init system 问题的核心。如果child(这里是my_program
)co-operates很容易解决,但在所有其他情况下都很难。
一个简单的 co-operation 方法是让 child 向其 parent 进程发送一个信号,只要它准备好执行操作。为避免杀死 parent 个未准备好接收此类信息的进程,可以使用默认忽略的信号(例如 SIGWINCH
)。
睡眠一段时间的选项,以便新的 child 进程有足够的时间准备好采取行动,这是一种常见但非常不可靠的缓解此问题的方法。 (具体而言,所需的持续时间取决于 child 进程优先级和机器的总体负载。)