动态启动child接收到关机信号会异常

Start child dynamically will cause exception when receive shutdown signal

主管:

-module(mod_guild_chapter_sup).
-include("guild_dungeon.hrl").

-behaviour(supervisor).

%% API
-export([start_link/0]).

%% Supervisor callbacks
-export([init/1]).

-define(SERVER, ?MODULE).

%%%===================================================================
%%% API functions
%%%===================================================================

%%--------------------------------------------------------------------
%% @doc
%% Starts the supervisor
%%
%% @spec start_link() -> {ok, Pid} | ignore | {error, Error}
%% @end
%%--------------------------------------------------------------------
start_link() ->
    supervisor:start_link({local, ?SERVER}, ?MODULE, []).

%%%===================================================================
%%% Supervisor callbacks
%%%===================================================================

%%--------------------------------------------------------------------
%% @private
%% @doc
%% Whenever a supervisor is started using supervisor:start_link/[2,3],
%% this function is called by the new process to find out about
%% restart strategy, maximum restart frequency and child
%% specifications.
%%
%% @spec init(Args) -> {ok, {SupFlags, [ChildSpec]}} |
%%                     ignore |
%%                     {error, Reason}
%% @end
%%--------------------------------------------------------------------
init([]) ->
    RestartStrategy = simple_one_for_one,
    MaxRestarts = 1000,
    MaxSecondsBetweenRestarts = 3600,

    SupFlags = {RestartStrategy, MaxRestarts, MaxSecondsBetweenRestarts},

    Restart = transient,
    Shutdown = 60000,
    Type = worker,

    ModGuildChapter = {'guild_chapter', {'mod_guild_chapter', start_link, []},
                       Restart, Shutdown, Type, ['mod_guild_chapter']},

    {ok, {SupFlags, [ModGuildChapter]}}.

child:

-module(mod_guild_chapter).

-record(state, {}).

start_link(GuildId, ChapterId) ->
    gen_server:start_link(?MODULE, [GuildId, ChapterId], []).

init([GuildId, ChapterId]) ->
    case condition() of
        true -> ignore;
        false -> {ok, #state{})
    end.

...omit other callbacks...

supervisor:which_children(mod_guild_chapter_sup):

[{undefined,<0.9635.0>,worker,[mod_guild_chapter]},
 {undefined,<0.9539.0>,worker,[mod_guild_chapter]},
 {undefined,<0.9475.0>,worker,[mod_guild_chapter]},
 {undefined,<0.9493.0>,worker,[mod_guild_chapter]},
 {undefined,<0.9654.0>,worker,[mod_guild_chapter]},
 {undefined,undefined,worker,[mod_guild_chapter]},
 {undefined,<0.9658.0>,worker,[mod_guild_chapter]},
 {undefined,<0.9517.0>,worker,[mod_guild_chapter]},
 {undefined,<0.9567.0>,worker,[mod_guild_chapter]}]

接收异常shutdown:

2015-07-03 14:56:33 =CRASH REPORT====
  crasher:
    initial call: mod_guild_chapter:init/1
    pid: <0.9475.0>
    registered_name: []
    exception exit: {{function_clause,[{supervisor,'-monitor_dynamic_children/2-fun-1-',[undefined,[100062,10003],{{set,3,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[],[],[],[],[],[<0.9658.0>],[],[],[<0.9517.0>],[],[<0.9567.0>],[],[],[]}}},{dict,0,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}}}}],[{file,"supervisor.erl"},{line,992}]},{dict,fold_bucket,3,[{file,"dict.erl"},{line,441}]},{dict,fold_seg,4,[{file,"dict.erl"},{line,437}]},{dict,fold_segs,4,[{file,"dict.erl"},{line,433}]},{supervisor,terminate_dynamic_children,3,[{file,"supervisor.erl"},{line,959}]},{gen_server,terminate,6,[{file,"gen_server.erl"},{line,719}]},{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,239}]}]},[{gen_server,terminate,6,[{file,"gen_server.erl"},{line,744}]},{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,239}]}]}
    ancestors: [mod_guild_chapter_sup,mod_guild_sup,yg_sup,<0.82.0>]
    messages: []
    links: []
    dictionary: []
    trap_exit: true
    status: running
    heap_size: 1598
    stack_size: 27
    reductions: 6586
  neighbours:

如您所见,有一个 undefined pid 不应该存在。

文档中有两处对此进行了解释,但它们之间存在冲突。

  1. 如果 child 不是 temporary,即使 returns ignore.[=43=,child 规范将由主管保留]

start defines the function call used to start the child process. It must be a module-function-arguments tuple {M,F,A} used as apply(M,F,A).

The start function must create and link to the child process, and must return {ok,Child} or {ok,Child,Info} where Child is the pid of the child process and Info an arbitrary term which is ignored by the supervisor.

The start function can also return ignore if the child process for some reason cannot be started, in which case the child specification will be kept by the supervisor (unless it is a temporary child) but the non-existing child process will be ignored.

  1. simple_one_for_one 如果 returns ignore 启动时 pid 不会添加到 supervisor.

If the child process start function returns ignore, the child specification is added to the supervisor (unless the supervisor is a simple_one_for_one supervisor, see below), the pid is set to undefined and the function returns {ok,undefined}.

In the case of a simple_one_for_one supervisor, when a child process start function returns ignore the functions returns {ok,undefined} and no child is added to the supervisor.

我对文档感到困惑。我选择 transient 重启策略,因为 child 应该在崩溃时重启。但是我怎样才能在这里避免这个异常呢?

好的,我又看了一遍你的问题。文档说,如果您 return 忽略 child 的启动函数,则此 child 是 而不是 启动。对于 simple_one_for_one 主管,child 规格也不会保留(这是合乎逻辑的,因为在主管开始时只保存了一个规格)。这意味着行:

{undefined,undefined,worker,[mod_guild_chapter]},

只是说没有 child 进程已经启动。因此,该过程不能成为您例外的原因。崩溃报告为您提供了崩溃进程的 pid,它是 <0.9475.0>。您可以在 which_children 的 return 列表中找到它。这是检查崩溃原因的过程。