如何从 PostgreSQL 中的 table 中找到值子集的平均值?
How to find the average of a subset of values from a table in PostgreSQL?
请原谅,我是 Postgre 的新手SQL,我的任务是更新某些表中的某些字段。一个特定的字段是如下所示的平均决策时间:
CASE WHEN COUNT(tdrm.dbid) > 0
THEN TO_CHAR((AVG(tdrm.total_processing_time) || ' millisecond')::interval, 'MI:SS.MS')
ELSE '00:00.000'
END AS average_decision_time
其中 COUNT(tdrm.dbid)
是 items_seen
。此逻辑的问题在于,我们希望从中止标志等于 'AF_ABORT'.
的项目的平均值中排除总处理时间
这就是我想要做的:
CASE WHEN COUNT(tdrm.dbid) > 0
THEN TO_CHAR((AVG(COUNT(CASE WHEN tdrm.tdr_abort_flag!=AF_ABORT THEN tdrm.total_processing_time END)) || ' millisecond')::interval, 'MI:SS.MS')
ELSE '00:00.000'
END AS average_decision_time
但我收到以下错误:
ERROR: aggregate function calls cannot be nested
LINE 64: THEN TO_CHAR((AVG(COUNT(CASE WHEN tdrm.tdr_abort_flag!=A...
我走在正确的轨道上还是有更简单的方法?
完整 SQL 以下:
SELECT s.*,
CASE WHEN agent_event.event_code = 'data_download' THEN 'DL'
WHEN agent_event.event_code = 'mode' THEN 'Mode'
ELSE agent_event.event_code
END AS userAction
FROM
(
WITH report_constants AS (
-- Decisions from DetectionReport.h
SELECT
0::int as AD_UNKNOWN,
1::int as AD_ALARM,
2::int as AD_CLEAR,
-- Flags from DetectionReport.h
0::int as AF_UNKNOWN,
1::int as AF_ABORT,
2::int as AF_SUCCESS,
-- UI values for Decisions are DIFFERENT
0::int as UI_AD_ALL,
1::int as UI_AD_CLEAR,
2::int as UI_AD_ALARM,
3::int as UI_AD_UNKNOWN,
--
0::int as AGENT_TYPE_SCANNER,
1::int as AGENT_TYPE_OSR,
2::int as AGENT_TYPE_DIVERTER,
3::int as AGENT_TYPE_TIP,
4::int as AGENT_TYPE_SEARCH,
-- Operation Mode from Module.h
0::int as OPERATION_MODE_UNKNOWN,
1::int as OPERATION_MODE_SCAN,
2::int as OPERATION_MODE_OTHER
)
SELECT
nss_user.username AS user_name,
reg_login.action_time AS login_action_time,
reg_logout.action_time AS logout_action_time,
to_char(reg_login.action_time, 'MM-DD-YYYY') AS login_date,
to_char(reg_login.action_time, 'HH24:MI:SS') AS login_time,
CASE WHEN reg_logout.action_time IS NULL THEN '' ELSE
to_char(reg_logout.action_time, 'MM-DD-YYYY') END AS logout_date,
CASE WHEN reg_logout.action_time IS NULL THEN '' ELSE
to_char(reg_logout.action_time, 'HH24:MI:SS') END AS logout_time,
CASE WHEN user_level.name LIKE 'Level %' THEN SUBSTRING(user_level.name from 7) ELSE user_level.name END AS userAccess,
COUNT(tdrm.dbid) AS items_seen,
CASE WHEN COUNT(tdrm.dbid) > 0
THEN ROUND(100.0 * COUNT(CASE WHEN tdrm.tdr_abort_flag=AF_SUCCESS
AND tdrm.tdr_alarm_decision=AD_CLEAR THEN 1 END) / COUNT(tdrm.dbid), 2)
ELSE 0.00
END AS clear_rate,
COUNT(CASE WHEN (tdrm.tdr_abort_flag=AF_SUCCESS
AND tdrm.tdr_alarm_decision=AD_UNKNOWN)
OR tdrm.tdr_abort_flag=AF_ABORT THEN 1 END) AS operator_timeout,
CASE WHEN COUNT(tdrm.dbid) > 0
THEN ROUND(100.0 * COUNT(CASE WHEN tdrm.tdr_abort_flag=AF_SUCCESS
AND tdrm.tdr_alarm_decision=AD_ALARM THEN 1 END) / COUNT(tdrm.dbid), 2)
ELSE 0.00
END AS suspect_rate,
CASE WHEN COUNT(tdrm.dbid) > 0
THEN ROUND(100.0 * COUNT(CASE WHEN
(tdrm.tdr_abort_flag=AF_SUCCESS AND tdrm.tdr_alarm_decision=AD_UNKNOWN)
OR tdrm.tdr_abort_flag=AF_ABORT THEN 1 END) / COUNT(tdrm.dbid), 2)
ELSE 0.00
END AS operatorNoDecisionRate,
CASE WHEN COUNT(tdrm.dbid) > 0
THEN TO_CHAR((AVG(CASE WHEN tdrm.tdr_abort_flag!=AF_ABORT THEN (tdrm.total_processing_time) END) || ' millisecond')::interval, 'MI:SS.MS')
ELSE '00:00.000'
END AS average_decision_time
v2_module.dbid AS v2_gem_dbid
FROM report_constants CROSS JOIN auth_event
INNER JOIN registration_event AS reg_login
ON reg_login.credential_id=auth_event.credential_id
AND reg_login.event_type=3
LEFT OUTER JOIN registration_event AS reg_logout
ON reg_logout.credential_id=auth_event.credential_id
AND reg_logout.event_type=4
INNER JOIN nss_user ON nss_user.dbid=auth_event.nss_user_dbid
INNER JOIN user_level ON user_level.dbid=nss_user.user_level_dbid
LEFT OUTER JOIN bag_tdr ON nss_user.dbid=bag_tdr.author_user_dbid
AND (item_tdr.agent_type=AGENT_TYPE_OSR OR
item_tdr.agent_type=AGENT_TYPE_SEARCH)
AND item_tdr.author_credential_id=auth_event.credential_id
LEFT OUTER JOIN v2_module AS tdrm ON
item_tdr.v2_module_dbid=tdrm.dbid
LEFT OUTER JOIN v2_general_equipment_module
ON v2_general_equipment_module.dbid=reg_login.v2_gem_dbid
WHERE auth_event.credential_id IS NOT NULL
AND auth_event.auth_event_type=1
AND ($P{userid} = 'ALL' OR $P{userid} = nss_user.username)
AND item_tdr.created_date >= $P{fromdate}
AND item_tdr.created_date <= $P{todate}
AND v2_module.operation_mode != OPERATION_MODE_OTHER
GROUP BY nss_user.username, user_level.name, reg_login.agent_type,
reg_login.action_time, reg_logout.action_time,
v2_module.dbid
) s
LEFT OUTER JOIN agent_event
ON s.v2_dbid=agent_event.v2_dbid
AND agent_event.event_timestamp >= s.login_action_time
AND (s.logout_action_time IS NULL OR agent_event.event_timestamp <= s.logout_action_time)
ORDER BY s.login_action_time
您应该能够通过一个简单的子查询来实现这一点,该子查询从 table 中重新选择并在计算平均值时使用 WHERE
子句过滤掉中止的记录。像这样:
CASE WHEN COUNT(tdrm.dbid) > 0
THEN TO_CHAR((SELECT (AVG(CASE WHEN tdrm_subq.tdr_abort_flag != AF_ABORT
THEN (tdrm_subq.total_processing_time)
END) || ' millisecond')
FROM v2_module tdrm_subq, report_constants
WHERE tdr_abort_flag != AF_ABORT)::interval,
'MI:SS.MS')
ELSE '00:00.000'
END AS average_decision_time
we want to exclude the total processing time from the average for items that have an abort flag equal to 'AF_ABORT'.
CASE WHEN count(tdrm.dbid) > 0
THEN to_char(avg(tdrm.total_processing_time)
FILTER (WHERE tdrm.tdr_abort_flag IS DISTINCT FROM 'AF_ABORT') -- ①, ②
* interval '1 millisecond' -- ③
, 'MI:SS.MS')
ELSE '00:00.000'
END AS average_decision_time
① 实现过滤器的关键元素是聚合 FILTER
子句。参见:
② 如果tdrm.tdr_abort_flag
可以为NULL(缺少信息),我们需要tdrm.tdr_abort_flag IS DISTINCT FROM 'AF_ABORT'
。否则我们可以简化为 tdrm.tdr_abort_flag <> 'AF_ABORT'
.
③ 乘法比连接加转换要快得多。
但是这样加了一个FILTER
之后,表达式毕竟会产生NULL值。你的要求很模糊。你可能真的想要:
total_processing_time
的平均值,其中 tdr_abort_flag <> 'AF_ABORT'
。如果出于任何原因结果为 NULL
,则默认为 0
:
COALESCE(to_char(avg(tdrm.total_processing_time) FILTER (WHERE tdrm.tdr_abort_flag <> 'AF_ABORT')
* interval '1 millisecond'
, 'MI:SS.MS')
, '00:00.000') AS average_decision_time
或:
total_processing_time
的平均值,其中 tdr_abort_flag <> 'AF_ABORT'
。但前提是 count(tdrm.dbid) > 0
。如果由于任何原因结果为 NULL,则默认为 0
:
CASE WHEN count(tdrm.dbid) > 0
THEN COALESCE(to_char(avg(tdrm.total_processing_time) FILTER (WHERE tdrm.tdr_abort_flag <> 'AF_ABORT')
* interval '1 millisecond'
, 'MI:SS.MS')
, '00:00.000')
ELSE '00:00.000'
END AS average_decision_time
或:
total_processing_time
的平均值,其中 tdr_abort_flag <> 'AF_ABORT'
。但前提是 count(tdrm.dbid) > 0
其中 tdr_abort_flag <> 'AF_ABORT'
。否则默认为 0
CASE WHEN count(tdrm.dbid) FILTER (WHERE tdrm.tdr_abort_flag <> 'AF_ABORT') > 0
THEN to_char(avg(tdrm.total_processing_time) FILTER (WHERE tdrm.tdr_abort_flag <> 'AF_ABORT')
* interval '1 millisecond'
, 'MI:SS.MS')
ELSE '00:00.000'
END AS average_decision_time
您提到您是“SQL 的新手”。让我补充一点:crystal-问题的明确定义是 > 50% 的解决方案。在许多领域都是如此,但肯定是 SQL.
除了子查询之外,还应该提到它正是 Window Functions
所针对的情况。
请原谅,我是 Postgre 的新手SQL,我的任务是更新某些表中的某些字段。一个特定的字段是如下所示的平均决策时间:
CASE WHEN COUNT(tdrm.dbid) > 0
THEN TO_CHAR((AVG(tdrm.total_processing_time) || ' millisecond')::interval, 'MI:SS.MS')
ELSE '00:00.000'
END AS average_decision_time
其中 COUNT(tdrm.dbid)
是 items_seen
。此逻辑的问题在于,我们希望从中止标志等于 'AF_ABORT'.
这就是我想要做的:
CASE WHEN COUNT(tdrm.dbid) > 0
THEN TO_CHAR((AVG(COUNT(CASE WHEN tdrm.tdr_abort_flag!=AF_ABORT THEN tdrm.total_processing_time END)) || ' millisecond')::interval, 'MI:SS.MS')
ELSE '00:00.000'
END AS average_decision_time
但我收到以下错误:
ERROR: aggregate function calls cannot be nested LINE 64: THEN TO_CHAR((AVG(COUNT(CASE WHEN tdrm.tdr_abort_flag!=A...
我走在正确的轨道上还是有更简单的方法?
完整 SQL 以下:
SELECT s.*,
CASE WHEN agent_event.event_code = 'data_download' THEN 'DL'
WHEN agent_event.event_code = 'mode' THEN 'Mode'
ELSE agent_event.event_code
END AS userAction
FROM
(
WITH report_constants AS (
-- Decisions from DetectionReport.h
SELECT
0::int as AD_UNKNOWN,
1::int as AD_ALARM,
2::int as AD_CLEAR,
-- Flags from DetectionReport.h
0::int as AF_UNKNOWN,
1::int as AF_ABORT,
2::int as AF_SUCCESS,
-- UI values for Decisions are DIFFERENT
0::int as UI_AD_ALL,
1::int as UI_AD_CLEAR,
2::int as UI_AD_ALARM,
3::int as UI_AD_UNKNOWN,
--
0::int as AGENT_TYPE_SCANNER,
1::int as AGENT_TYPE_OSR,
2::int as AGENT_TYPE_DIVERTER,
3::int as AGENT_TYPE_TIP,
4::int as AGENT_TYPE_SEARCH,
-- Operation Mode from Module.h
0::int as OPERATION_MODE_UNKNOWN,
1::int as OPERATION_MODE_SCAN,
2::int as OPERATION_MODE_OTHER
)
SELECT
nss_user.username AS user_name,
reg_login.action_time AS login_action_time,
reg_logout.action_time AS logout_action_time,
to_char(reg_login.action_time, 'MM-DD-YYYY') AS login_date,
to_char(reg_login.action_time, 'HH24:MI:SS') AS login_time,
CASE WHEN reg_logout.action_time IS NULL THEN '' ELSE
to_char(reg_logout.action_time, 'MM-DD-YYYY') END AS logout_date,
CASE WHEN reg_logout.action_time IS NULL THEN '' ELSE
to_char(reg_logout.action_time, 'HH24:MI:SS') END AS logout_time,
CASE WHEN user_level.name LIKE 'Level %' THEN SUBSTRING(user_level.name from 7) ELSE user_level.name END AS userAccess,
COUNT(tdrm.dbid) AS items_seen,
CASE WHEN COUNT(tdrm.dbid) > 0
THEN ROUND(100.0 * COUNT(CASE WHEN tdrm.tdr_abort_flag=AF_SUCCESS
AND tdrm.tdr_alarm_decision=AD_CLEAR THEN 1 END) / COUNT(tdrm.dbid), 2)
ELSE 0.00
END AS clear_rate,
COUNT(CASE WHEN (tdrm.tdr_abort_flag=AF_SUCCESS
AND tdrm.tdr_alarm_decision=AD_UNKNOWN)
OR tdrm.tdr_abort_flag=AF_ABORT THEN 1 END) AS operator_timeout,
CASE WHEN COUNT(tdrm.dbid) > 0
THEN ROUND(100.0 * COUNT(CASE WHEN tdrm.tdr_abort_flag=AF_SUCCESS
AND tdrm.tdr_alarm_decision=AD_ALARM THEN 1 END) / COUNT(tdrm.dbid), 2)
ELSE 0.00
END AS suspect_rate,
CASE WHEN COUNT(tdrm.dbid) > 0
THEN ROUND(100.0 * COUNT(CASE WHEN
(tdrm.tdr_abort_flag=AF_SUCCESS AND tdrm.tdr_alarm_decision=AD_UNKNOWN)
OR tdrm.tdr_abort_flag=AF_ABORT THEN 1 END) / COUNT(tdrm.dbid), 2)
ELSE 0.00
END AS operatorNoDecisionRate,
CASE WHEN COUNT(tdrm.dbid) > 0
THEN TO_CHAR((AVG(CASE WHEN tdrm.tdr_abort_flag!=AF_ABORT THEN (tdrm.total_processing_time) END) || ' millisecond')::interval, 'MI:SS.MS')
ELSE '00:00.000'
END AS average_decision_time
v2_module.dbid AS v2_gem_dbid
FROM report_constants CROSS JOIN auth_event
INNER JOIN registration_event AS reg_login
ON reg_login.credential_id=auth_event.credential_id
AND reg_login.event_type=3
LEFT OUTER JOIN registration_event AS reg_logout
ON reg_logout.credential_id=auth_event.credential_id
AND reg_logout.event_type=4
INNER JOIN nss_user ON nss_user.dbid=auth_event.nss_user_dbid
INNER JOIN user_level ON user_level.dbid=nss_user.user_level_dbid
LEFT OUTER JOIN bag_tdr ON nss_user.dbid=bag_tdr.author_user_dbid
AND (item_tdr.agent_type=AGENT_TYPE_OSR OR
item_tdr.agent_type=AGENT_TYPE_SEARCH)
AND item_tdr.author_credential_id=auth_event.credential_id
LEFT OUTER JOIN v2_module AS tdrm ON
item_tdr.v2_module_dbid=tdrm.dbid
LEFT OUTER JOIN v2_general_equipment_module
ON v2_general_equipment_module.dbid=reg_login.v2_gem_dbid
WHERE auth_event.credential_id IS NOT NULL
AND auth_event.auth_event_type=1
AND ($P{userid} = 'ALL' OR $P{userid} = nss_user.username)
AND item_tdr.created_date >= $P{fromdate}
AND item_tdr.created_date <= $P{todate}
AND v2_module.operation_mode != OPERATION_MODE_OTHER
GROUP BY nss_user.username, user_level.name, reg_login.agent_type,
reg_login.action_time, reg_logout.action_time,
v2_module.dbid
) s
LEFT OUTER JOIN agent_event
ON s.v2_dbid=agent_event.v2_dbid
AND agent_event.event_timestamp >= s.login_action_time
AND (s.logout_action_time IS NULL OR agent_event.event_timestamp <= s.logout_action_time)
ORDER BY s.login_action_time
您应该能够通过一个简单的子查询来实现这一点,该子查询从 table 中重新选择并在计算平均值时使用 WHERE
子句过滤掉中止的记录。像这样:
CASE WHEN COUNT(tdrm.dbid) > 0
THEN TO_CHAR((SELECT (AVG(CASE WHEN tdrm_subq.tdr_abort_flag != AF_ABORT
THEN (tdrm_subq.total_processing_time)
END) || ' millisecond')
FROM v2_module tdrm_subq, report_constants
WHERE tdr_abort_flag != AF_ABORT)::interval,
'MI:SS.MS')
ELSE '00:00.000'
END AS average_decision_time
we want to exclude the total processing time from the average for items that have an abort flag equal to 'AF_ABORT'.
CASE WHEN count(tdrm.dbid) > 0
THEN to_char(avg(tdrm.total_processing_time)
FILTER (WHERE tdrm.tdr_abort_flag IS DISTINCT FROM 'AF_ABORT') -- ①, ②
* interval '1 millisecond' -- ③
, 'MI:SS.MS')
ELSE '00:00.000'
END AS average_decision_time
① 实现过滤器的关键元素是聚合 FILTER
子句。参见:
② 如果tdrm.tdr_abort_flag
可以为NULL(缺少信息),我们需要tdrm.tdr_abort_flag IS DISTINCT FROM 'AF_ABORT'
。否则我们可以简化为 tdrm.tdr_abort_flag <> 'AF_ABORT'
.
③ 乘法比连接加转换要快得多。
但是这样加了一个FILTER
之后,表达式毕竟会产生NULL值。你的要求很模糊。你可能真的想要:
total_processing_time
的平均值,其中 tdr_abort_flag <> 'AF_ABORT'
。如果出于任何原因结果为 NULL
,则默认为 0
:
COALESCE(to_char(avg(tdrm.total_processing_time) FILTER (WHERE tdrm.tdr_abort_flag <> 'AF_ABORT')
* interval '1 millisecond'
, 'MI:SS.MS')
, '00:00.000') AS average_decision_time
或:
total_processing_time
的平均值,其中 tdr_abort_flag <> 'AF_ABORT'
。但前提是 count(tdrm.dbid) > 0
。如果由于任何原因结果为 NULL,则默认为 0
:
CASE WHEN count(tdrm.dbid) > 0
THEN COALESCE(to_char(avg(tdrm.total_processing_time) FILTER (WHERE tdrm.tdr_abort_flag <> 'AF_ABORT')
* interval '1 millisecond'
, 'MI:SS.MS')
, '00:00.000')
ELSE '00:00.000'
END AS average_decision_time
或:
total_processing_time
的平均值,其中 tdr_abort_flag <> 'AF_ABORT'
。但前提是 count(tdrm.dbid) > 0
其中 tdr_abort_flag <> 'AF_ABORT'
。否则默认为 0
CASE WHEN count(tdrm.dbid) FILTER (WHERE tdrm.tdr_abort_flag <> 'AF_ABORT') > 0
THEN to_char(avg(tdrm.total_processing_time) FILTER (WHERE tdrm.tdr_abort_flag <> 'AF_ABORT')
* interval '1 millisecond'
, 'MI:SS.MS')
ELSE '00:00.000'
END AS average_decision_time
您提到您是“SQL 的新手”。让我补充一点:crystal-问题的明确定义是 > 50% 的解决方案。在许多领域都是如此,但肯定是 SQL.
除了子查询之外,还应该提到它正是 Window Functions
所针对的情况。