SQL - 使用 union 按 2 列或更多列进行分组计数需要 2 秒以上
SQL - Group count by 2 or more columns using union takes more than 2 seconds
给定一个 table,我们称它为 performance6a -
具有 table 结构 >>
student_id |数学 |历史 |语言 |科学
table content - performance6a
我需要编写一个生成如下报告的查询 -
expected query output
组计数基于 2 个类别,在本例中,通过或未通过,然后是主题。
请注意,这些操作是在无法规范化或重新设计的系统 table 上执行的。这两个类别的数量都可以增加(类别 - pass/fail 最多可以增加 100 个,类别 - 主题最多可以增加 ~5k)
我试过的:
(SELECT 'PASS' AS STATUS, 'MATH', COUNT(ID) FROM PERFORMANCE6A WHERE MATH > 30) UNION
(SELECT 'FAIL' AS STATUS, 'MATH', COUNT(ID) FROM PERFORMANCE6A WHERE MATH <= 30) UNION
(SELECT 'PASS' AS STATUS, 'HISTORY', COUNT(ID) FROM PERFORMANCE6A WHERE HISTORY > 30) UNION
(SELECT 'FAIL' AS STATUS, 'HISTORY', COUNT(ID) FROM PERFORMANCE6A WHERE HISTORY <= 30) UNION
(SELECT 'PASS' AS STATUS, 'LANGUAGE', COUNT(ID) FROM PERFORMANCE6A WHERE LANGUAGE > 30) UNION
(SELECT 'FAIL' AS STATUS, 'LANGUAGE', COUNT(ID) FROM PERFORMANCE6A WHERE LANGUAGE <= 30) UNION
(SELECT 'PASS' AS STATUS, 'SCIENCE', COUNT(ID) FROM PERFORMANCE6A WHERE SCIENCE > 30) UNION
(SELECT 'FAIL' AS STATUS, 'SCIENCE', COUNT(ID) FROM PERFORMANCE6A WHERE SCIENCE <= 30);
这个查询给了我正确的输出,但是执行时间超过了2s。 我正在寻找优化查询的想法。
如果先逆透视然后聚合,可能会更快:
select (case when score > 30 then 'PASS' else 'FAIL' end) as status, subject, count(*)
from ((select 'math' as subject, math as score from PERFORMANCE6A) union all
(select 'history' as subject, history as score from PERFORMANCE6A) union all
(select 'language' as subject, language as score from PERFORMANCE6A) union all
(select 'science' as subject, science as score from PERFORMANCE6A)
) ss
group by (case when score > 30 then 'PASS' else 'FAIL' end), subject
试试这个:
WITH PERFORMANCE6A (STUDENT_ID, MATH, HISTORY, LANGUAGE, SCIENCE) AS
(
VALUES
(1, 23, 42, 40, 35)
, (2, 45, 21, 25, 36)
, (3, 32, 12, 15, 27)
, (4, 49, 45, 38, 31)
, (5, 50, 43, 37, 41)
)
SELECT RESULT, NAME, COUNT(1) CNT
FROM
(
SELECT
D.NAME
, CASE
WHEN D.NAME = 'MATH' THEN CASE WHEN P.MATH > 30 THEN 'PASS' ELSE 'FAIL' END
WHEN D.NAME = 'HISTORY' THEN CASE WHEN P.HISTORY > 30 THEN 'PASS' ELSE 'FAIL' END
WHEN D.NAME = 'LANGUAGE' THEN CASE WHEN P.LANGUAGE > 30 THEN 'PASS' ELSE 'FAIL' END
WHEN D.NAME = 'SCIENCE' THEN CASE WHEN P.SCIENCE > 30 THEN 'PASS' ELSE 'FAIL' END
END AS RESULT
FROM PERFORMANCE6A P
CROSS JOIN (VALUES 'MATH', 'HISTORY', 'LANGUAGE', 'SCIENCE') D (NAME)
)
GROUP BY RESULT, NAME
ORDER BY RESULT, NAME;
给定一个 table,我们称它为 performance6a - 具有 table 结构 >> student_id |数学 |历史 |语言 |科学
table content - performance6a
我需要编写一个生成如下报告的查询 -
expected query output
组计数基于 2 个类别,在本例中,通过或未通过,然后是主题。
请注意,这些操作是在无法规范化或重新设计的系统 table 上执行的。这两个类别的数量都可以增加(类别 - pass/fail 最多可以增加 100 个,类别 - 主题最多可以增加 ~5k)
我试过的:
(SELECT 'PASS' AS STATUS, 'MATH', COUNT(ID) FROM PERFORMANCE6A WHERE MATH > 30) UNION
(SELECT 'FAIL' AS STATUS, 'MATH', COUNT(ID) FROM PERFORMANCE6A WHERE MATH <= 30) UNION
(SELECT 'PASS' AS STATUS, 'HISTORY', COUNT(ID) FROM PERFORMANCE6A WHERE HISTORY > 30) UNION
(SELECT 'FAIL' AS STATUS, 'HISTORY', COUNT(ID) FROM PERFORMANCE6A WHERE HISTORY <= 30) UNION
(SELECT 'PASS' AS STATUS, 'LANGUAGE', COUNT(ID) FROM PERFORMANCE6A WHERE LANGUAGE > 30) UNION
(SELECT 'FAIL' AS STATUS, 'LANGUAGE', COUNT(ID) FROM PERFORMANCE6A WHERE LANGUAGE <= 30) UNION
(SELECT 'PASS' AS STATUS, 'SCIENCE', COUNT(ID) FROM PERFORMANCE6A WHERE SCIENCE > 30) UNION
(SELECT 'FAIL' AS STATUS, 'SCIENCE', COUNT(ID) FROM PERFORMANCE6A WHERE SCIENCE <= 30);
这个查询给了我正确的输出,但是执行时间超过了2s。 我正在寻找优化查询的想法。
如果先逆透视然后聚合,可能会更快:
select (case when score > 30 then 'PASS' else 'FAIL' end) as status, subject, count(*)
from ((select 'math' as subject, math as score from PERFORMANCE6A) union all
(select 'history' as subject, history as score from PERFORMANCE6A) union all
(select 'language' as subject, language as score from PERFORMANCE6A) union all
(select 'science' as subject, science as score from PERFORMANCE6A)
) ss
group by (case when score > 30 then 'PASS' else 'FAIL' end), subject
试试这个:
WITH PERFORMANCE6A (STUDENT_ID, MATH, HISTORY, LANGUAGE, SCIENCE) AS
(
VALUES
(1, 23, 42, 40, 35)
, (2, 45, 21, 25, 36)
, (3, 32, 12, 15, 27)
, (4, 49, 45, 38, 31)
, (5, 50, 43, 37, 41)
)
SELECT RESULT, NAME, COUNT(1) CNT
FROM
(
SELECT
D.NAME
, CASE
WHEN D.NAME = 'MATH' THEN CASE WHEN P.MATH > 30 THEN 'PASS' ELSE 'FAIL' END
WHEN D.NAME = 'HISTORY' THEN CASE WHEN P.HISTORY > 30 THEN 'PASS' ELSE 'FAIL' END
WHEN D.NAME = 'LANGUAGE' THEN CASE WHEN P.LANGUAGE > 30 THEN 'PASS' ELSE 'FAIL' END
WHEN D.NAME = 'SCIENCE' THEN CASE WHEN P.SCIENCE > 30 THEN 'PASS' ELSE 'FAIL' END
END AS RESULT
FROM PERFORMANCE6A P
CROSS JOIN (VALUES 'MATH', 'HISTORY', 'LANGUAGE', 'SCIENCE') D (NAME)
)
GROUP BY RESULT, NAME
ORDER BY RESULT, NAME;