From f5a820cb102c9d9ae60a3d340960a053f0522cca Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Fri, 29 Mar 2024 13:09:08 +0200 Subject: [PATCH 1/3] fix(emqx_mgmt): catch OOM shutdown exits properly when calling a client conn process The exit reason is expected to include gen_server `Location`: `{{shutdown, OOMInfo}, Location}`. --- apps/emqx_management/src/emqx_mgmt.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/emqx_management/src/emqx_mgmt.erl b/apps/emqx_management/src/emqx_mgmt.erl index 35908d3bd..df0450395 100644 --- a/apps/emqx_management/src/emqx_mgmt.erl +++ b/apps/emqx_management/src/emqx_mgmt.erl @@ -711,5 +711,7 @@ call_conn(ConnMod, Pid, Req) -> exit:R when R =:= shutdown; R =:= normal -> {error, shutdown}; exit:{R, _} when R =:= shutdown; R =:= noproc -> + {error, shutdown}; + exit:{{shutdown, _OOMInfo}, _Location} -> {error, shutdown} end. From 42af1f9d634b1408f537c233a7d1fe9801e7950b Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Fri, 29 Mar 2024 13:29:19 +0200 Subject: [PATCH 2/3] fix: handle internal timeout errors in client Mqueue/Inflight APIs --- apps/emqx_management/src/emqx_mgmt.erl | 14 +++++++++++++- apps/emqx_management/src/emqx_mgmt_api_clients.erl | 2 ++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/apps/emqx_management/src/emqx_mgmt.erl b/apps/emqx_management/src/emqx_mgmt.erl index df0450395..bc194f03e 100644 --- a/apps/emqx_management/src/emqx_mgmt.erl +++ b/apps/emqx_management/src/emqx_mgmt.erl @@ -713,5 +713,17 @@ call_conn(ConnMod, Pid, Req) -> exit:{R, _} when R =:= shutdown; R =:= noproc -> {error, shutdown}; exit:{{shutdown, _OOMInfo}, _Location} -> - {error, shutdown} + {error, shutdown}; + exit:timeout -> + ?SLOG( + warning, + #{ + msg => "call_client_connection_process_timeout", + request => Req, + pid => Pid, + module => ConnMod, + stacktrace => erlang:process_info(Pid, current_stacktrace) + } + ), + {error, timeout} end. diff --git a/apps/emqx_management/src/emqx_mgmt_api_clients.erl b/apps/emqx_management/src/emqx_mgmt_api_clients.erl index dd65c1245..262faf87f 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_clients.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_clients.erl @@ -1232,6 +1232,8 @@ list_client_msgs(MsgType, ClientID, QString) -> code => 'NOT_IMPLEMENTED', message => <<"API not implemented for persistent sessions">> }}; + {error, Reason} -> + ?INTERNAL_ERROR(Reason); {Msgs, Meta = #{}} when is_list(Msgs) -> format_msgs_resp(MsgType, Msgs, Meta, QString) end From 6cdf876684932d81f8bd063c85a175b5d9cc455c Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Fri, 29 Mar 2024 13:39:36 +0200 Subject: [PATCH 3/3] chore: add changelog --- changes/ce/fix-12814.en.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changes/ce/fix-12814.en.md diff --git a/changes/ce/fix-12814.en.md b/changes/ce/fix-12814.en.md new file mode 100644 index 000000000..f84025561 --- /dev/null +++ b/changes/ce/fix-12814.en.md @@ -0,0 +1,4 @@ +Handle several errors in `/clients/{clientid}/mqueue_messages` and `/clients/{clientid}/inflight_messages` APIs: + +- Internal timeout, which means that EMQX failed to get the list of Inflight/Mqueue messages within the default timeout of 5 s. This error may occur when the system is under a heavy load. The API will return 500 `{"code":"INTERNAL_ERROR","message":"timeout"}` response and log additional details. +- Client shutdown. The error may occur if the client connection is shutdown during the API call. The API will return 404 `{"code": "CLIENT_SHUTDOWN", "message": "Client connection has been shutdown"}` response in this case.