elixir:找出进程终止的原因
elixir: find out the reason of process termination
在性能测试期间,我的应用程序终止并显示以下日志:
17:17:28.187 [info] SIGTERM received - shutting down
17:17:28.187 [info] SIGTERM received - shutting down
17:17:28.188 [error] GenServer #PID<0.3707.0> terminating
** (stop) 'stopping because dependent process <0.3703.0> died: shutdown'
Last message: {:EXIT, #PID<0.3703.0>, :shutdown}
17:17:28.189 [error] gen_server <0.3707.0> terminated with reason: "stopping because dependent process <0.3703.0> died: shutdown"
17:17:28.190 [error] CRASH REPORT Process <0.3707.0> with 0 neighbours exited with reason: "stopping because dependent process <0.3703.0> died: shutdown" in gen_server:handle_common_reply/8 line 751
17:17:28.190 [error] Supervisor {<0.3705.0>,amqp_connection_sup} had child connection started with amqp_gen_connection:start_link(<0.3706.0>, {amqp_params_network,<<"publicmq-npperfcom1">>,<<"publicmq-npperfcom1">>,<<"/publicmq-npperfcom1">>,...}) at <0.3707.0> exit with reason "stopping because dependent process <0.3703.0> died: shutdown" in context child_terminated
17:17:28.190 [error] Supervisor {<0.3705.0>,amqp_connection_sup} had child connection started with amqp_gen_connection:start_link(<0.3706.0>, {amqp_params_network,<<"publicmq-npperfcom1">>,<<"publicmq-npperfcom1">>,<<"/publicmq-npperfcom1">>,...}) at <0.3707.0> exit with reason reached_max_restart_intensity in context shutdown
生成与 amqp 连接的代码如下所示:
defmodule MyApp.Events.AmqpTransport do
require Logger
use GenServer
use AMQP
@restart_delay 2000 # 2 seconds
defmodule State do
@moduledoc false
@type t :: %__MODULE__{
exchange: String.t,
channel: AMQP.Channel.t,
routing_key: String.t,
emitter_id: String.t,
np_tracking_id: String.t
}
defstruct [:exchange, :channel, :routing_key, :emitter_id, :np_tracking_id]
end
def start_link(_) do
GenServer.start_link(__MODULE__, [], name: __MODULE__)
end
def init(_opts) do
Process.flag(:trap_exit, true)
send(self(), :connect)
{:ok, nil}
end
def handle_info(:connect, _state) do
username = get_conf(:username)
password = get_conf(:password)
host = get_conf(:host)
port = get_conf(:port)
vhost = String.replace(get_conf(:vhost), "/", "%2f")
amqp_url = "amqp://#{username}:#{password}@#{host}:#{port}/#{vhost}"
Logger.info("amqp transport connecting to #{amqp_url}")
case Connection.open(amqp_url) do
{:ok, conn} ->
Process.link(conn.pid)
{:ok, chan} = Channel.open(conn)
:ok = AMQP.Exchange.declare(chan, get_conf(:exchange), :topic, durable: true)
state = %State{
exchange: get_conf(:exchange),
channel: chan,
routing_key: get_conf(:routing_key),
emitter_id: Application.fetch_env!(:coups_events, :emitter_id),
np_tracking_id: Application.fetch_env!(:coups_events, :np_tracking_id),
}
{:noreply, state}
{:error, err} ->
Logger.error("amqp transport failed\n Err: #{inspect(err)}\n Retrying to connect ...")
Process.send_after(self(), :connect, @restart_delay)
{:noreply, nil}
end
end
def handle_info({:EXIT, pid, reason}, _state) do
Logger.error("amqp transport failed with #{inspect(reason)}")
Process.unlink(pid)
Process.send_after(self(), :connect, @restart_delay)
{:noreply, nil}
end
def handle_cast({:emit, event}, state) do
# event publishing
{:noreply, state}
end
defp get_conf(key) do
conf = Application.get_env(:events, :amqp)
conf[key]
end
end
问题:
- AmqpTransport 没有陷阱退出。为什么?
- 在错误日志中我看到了 pids。我可以在那里看到命名进程吗?
- 那里发生了什么使进程死亡?我怎样才能进一步调查细节?
Connection
开始由 amqp
应用程序的主管作为 amqp_sup:start_connection_sup(AmqpParams3)
(Connection.open/2
simply delegates to :amqp_connection.start/2
) 在内部监督。
将一个进程链接到两个捕获退出的进程是棘手的,而且通常不是幂等的,这就是为什么 official documentation suggests 到 Process.monitor/1
底层连接并走完整路径重新启动监视的原因过程也是如此。
我记得 Andrea Leopardi 曾就相关主题发表过 complaints,但它对我来说一直很管用。
在性能测试期间,我的应用程序终止并显示以下日志:
17:17:28.187 [info] SIGTERM received - shutting down
17:17:28.187 [info] SIGTERM received - shutting down
17:17:28.188 [error] GenServer #PID<0.3707.0> terminating
** (stop) 'stopping because dependent process <0.3703.0> died: shutdown'
Last message: {:EXIT, #PID<0.3703.0>, :shutdown}
17:17:28.189 [error] gen_server <0.3707.0> terminated with reason: "stopping because dependent process <0.3703.0> died: shutdown"
17:17:28.190 [error] CRASH REPORT Process <0.3707.0> with 0 neighbours exited with reason: "stopping because dependent process <0.3703.0> died: shutdown" in gen_server:handle_common_reply/8 line 751
17:17:28.190 [error] Supervisor {<0.3705.0>,amqp_connection_sup} had child connection started with amqp_gen_connection:start_link(<0.3706.0>, {amqp_params_network,<<"publicmq-npperfcom1">>,<<"publicmq-npperfcom1">>,<<"/publicmq-npperfcom1">>,...}) at <0.3707.0> exit with reason "stopping because dependent process <0.3703.0> died: shutdown" in context child_terminated
17:17:28.190 [error] Supervisor {<0.3705.0>,amqp_connection_sup} had child connection started with amqp_gen_connection:start_link(<0.3706.0>, {amqp_params_network,<<"publicmq-npperfcom1">>,<<"publicmq-npperfcom1">>,<<"/publicmq-npperfcom1">>,...}) at <0.3707.0> exit with reason reached_max_restart_intensity in context shutdown
生成与 amqp 连接的代码如下所示:
defmodule MyApp.Events.AmqpTransport do
require Logger
use GenServer
use AMQP
@restart_delay 2000 # 2 seconds
defmodule State do
@moduledoc false
@type t :: %__MODULE__{
exchange: String.t,
channel: AMQP.Channel.t,
routing_key: String.t,
emitter_id: String.t,
np_tracking_id: String.t
}
defstruct [:exchange, :channel, :routing_key, :emitter_id, :np_tracking_id]
end
def start_link(_) do
GenServer.start_link(__MODULE__, [], name: __MODULE__)
end
def init(_opts) do
Process.flag(:trap_exit, true)
send(self(), :connect)
{:ok, nil}
end
def handle_info(:connect, _state) do
username = get_conf(:username)
password = get_conf(:password)
host = get_conf(:host)
port = get_conf(:port)
vhost = String.replace(get_conf(:vhost), "/", "%2f")
amqp_url = "amqp://#{username}:#{password}@#{host}:#{port}/#{vhost}"
Logger.info("amqp transport connecting to #{amqp_url}")
case Connection.open(amqp_url) do
{:ok, conn} ->
Process.link(conn.pid)
{:ok, chan} = Channel.open(conn)
:ok = AMQP.Exchange.declare(chan, get_conf(:exchange), :topic, durable: true)
state = %State{
exchange: get_conf(:exchange),
channel: chan,
routing_key: get_conf(:routing_key),
emitter_id: Application.fetch_env!(:coups_events, :emitter_id),
np_tracking_id: Application.fetch_env!(:coups_events, :np_tracking_id),
}
{:noreply, state}
{:error, err} ->
Logger.error("amqp transport failed\n Err: #{inspect(err)}\n Retrying to connect ...")
Process.send_after(self(), :connect, @restart_delay)
{:noreply, nil}
end
end
def handle_info({:EXIT, pid, reason}, _state) do
Logger.error("amqp transport failed with #{inspect(reason)}")
Process.unlink(pid)
Process.send_after(self(), :connect, @restart_delay)
{:noreply, nil}
end
def handle_cast({:emit, event}, state) do
# event publishing
{:noreply, state}
end
defp get_conf(key) do
conf = Application.get_env(:events, :amqp)
conf[key]
end
end
问题:
- AmqpTransport 没有陷阱退出。为什么?
- 在错误日志中我看到了 pids。我可以在那里看到命名进程吗?
- 那里发生了什么使进程死亡?我怎样才能进一步调查细节?
Connection
开始由 amqp
应用程序的主管作为 amqp_sup:start_connection_sup(AmqpParams3)
(Connection.open/2
simply delegates to :amqp_connection.start/2
) 在内部监督。
将一个进程链接到两个捕获退出的进程是棘手的,而且通常不是幂等的,这就是为什么 official documentation suggests 到 Process.monitor/1
底层连接并走完整路径重新启动监视的原因过程也是如此。
我记得 Andrea Leopardi 曾就相关主题发表过 complaints,但它对我来说一直很管用。