subprocess.check_output 超时进程终止被推迟

subprocess.check_output with timeout process termination is deferred

我有以下代码,如果完成时间太长,它应该启动一个作业并终止进程。

import random
from datetime import datetime
from subprocess import check_output, STDOUT, TimeoutExpired

MAX_WAIT = 5

def custom_task(job=None):
    sec = job or random.randint(4,10)
    print('\nThis task will sleep {s} sec.'.format(s=sec))
    print('-/- {time} - - Commence new job <{job}>'.format(
             job=sec, time=datetime.now()))
    try:
        cmd = 'sleep {s}; echo "Woke up after {s} sec." | tee -a task.log'.format(s=sec)
        stdout = check_output(cmd, shell=True, stderr=STDOUT, timeout=MAX_WAIT).decode()
    except TimeoutExpired:
        print('~/~ {time} - - Job <{job}> has been cancelled'.format(
                 job=sec, time=datetime.now()))
    except Exception as err:
        print('!/! {time} - - Job <{job}> could not finish because of the error'.format(
                 job=sec, time=datetime.now()))
        print('{err}'.format(err=err))
    else:
        print('=/= {time} - - Job <{job}> has been done'.format(
                 job=sec, time=datetime.now()))
        print(stdout)

custom_task(4)
custom_task(8)

控制台给我以下输出:

This task will sleep 4 sec.
-/- 2016-11-03 01:07:56.037104 - - Commence new job <4>
=/= 2016-11-03 01:08:00.051072 - - Job <4> has been done
Woke up after 4 sec.


This task will sleep 8 sec.
-/- 2016-11-03 01:08:00.051233 - - Commence new job <8>
~/~ 2016-11-03 01:08:08.062563 - - Job <8> has been cancelled

请注意,本应休眠 8 秒的任务会在 8 秒后释放块,而不是在预期 MAX_WAIT = 5

之后

但是如果我检查 tasks.log 我会看到:

Woke up after 4 sec.

这意味着只有 4 秒的任务成功完成,这是期望和预期的。因此,我的脚本可以正常工作,但方式却出乎意料和不受欢迎。

有没有办法及时释放块(终止进程),从而一旦超过 MAX_WAIT 超时?

这是怎么回事,为什么 python 等到 sleep 结束睡眠?

编辑 - 一个工作示例

from random import randint
from os import killpg
from signal import SIGKILL
from datetime import datetime
from subprocess import Popen, STDOUT, TimeoutExpired, PIPE

MAX_WAIT = 5

def custom_task(job=None):
    sec = job or randint(4,10)
    print('\n# This task will sleep {s} sec.'.format(s=sec))
    print('-/- {time} - - Commence new job <{job}>'.format(
          job=sec, time=datetime.now()))
    cmd = 'sleep {s}; echo "Woke up after {s} sec." | tee -a tasks.log'.format(s=sec)
    with Popen(cmd, shell=True,
               stdout=PIPE, stderr=STDOUT,
               close_fds=True,
               universal_newlines=True,
               start_new_session=True) as proc:
        try:
            stdout = proc.communicate(timeout=MAX_WAIT)[0]
        except TimeoutExpired:
            print('~/~ {time} - - Job <{job}> has been cancelled'.format(
                  job=sec, time=datetime.now()))
            killpg(proc.pid, SIGKILL)
            stdout = proc.communicate(timeout=1)[0]
        except Exception as err:
            print('!/! {time} - - Job <{job}> could not finish because of the error'.format(
                  job=sec, time=datetime.now()))
            print('{err}'.format(err=err))
            killpg(proc.pid, SIGKILL)
        else:
            print('=/= {time} - - Job <{job}> has been done'.format(
                  job=sec, time=datetime.now()))
        print('# Return code: {}'.format(proc.returncode))
        print(stdout)

custom_task(4)
custom_task(30)

输出

$ time python3 popen.py 

# This task will sleep 4 sec.
-/- 2016-11-05 15:38:13.833871 - - Commence new job <4>
=/= 2016-11-05 15:38:17.842769 - - Job <4> has been done
# Return code: 0
Woke up after 4 sec.


# This task will sleep 30 sec.
-/- 2016-11-05 15:38:17.842942 - - Commence new job <30>
~/~ 2016-11-05 15:38:22.849511 - - Job <30> has been cancelled
# Return code: -9


real    0m9.095s
user    0m0.087s
sys 0m0.000s

check_output调用中实际上调用了几个进程。所以当subprocess发送kill信号时,并不会发送给所有的后代进程。 check_output 可能会等待其他进程完成。如需解决方案,请查看 Whosebug 上的