PostgreSQL 9.3:在 Pivot table 查询中过滤
PostgreSQL 9.3: Filter in Pivot table query
嗯,我有这个记录:
Employee_Number Employee_role Group_Name
----------------------------------------------------
EMP101 C# Developer Group_1
EMP102 ASP Developer Group_1
EMP103 SQL Developer Group_2
EMP104 PLSQL Developer Group_2
EMP101 Java Developer
EMP102 Web Developer
EMP101 DBA
EMP105 DBA
EMP106 SQL Developer Group_3
EMP107 Oracle Developer Group_3
EMP101 Oracle Developer Group_3
想要以下列格式显示上述记录的枢轴table:
Employee_Number TotalRoles TotalGroups Available Others Group_1 Group_2 Group_3
-----------------------------------------------------------------------------------------------------------------
EMP101 4 3 2 2 1 1
EMP102 2 3 1 1 1
EMP103 1 3 1 0 1
EMP104 1 3 1 0 1
EMP105 1 3 0 1
EMP106 1 3 1 0 1
EMP107 1 3 1 0 1
对于上述结果,我使用以下脚本:
SELECT * FROM crosstab(
$$SELECT grp.*, e.group_name
, CASE WHEN e.employee_number IS NULL THEN 0 ELSE 1 END AS val
FROM (
SELECT employee_number
, count(employee_role)::int AS total_roles
, (SELECT count(DISTINCT group_name)::int
FROM employee
WHERE group_name <> '') AS total_groups
, count(group_name <> '' OR NULL)::INT AS available
, count(group_name = '' OR NULL)::int AS others
FROM employee
GROUP BY employee_number
) grp
LEFT JOIN employee e ON e.employee_number = grp.employee_number
AND e.group_name <> ''
ORDER BY grp.employee_number, e.group_name$$
,$$VALUES ('Group_1'),('Group_2'),('Group_3')$$
) AS ct (employee_number text
, total_roles int
, total_groups int
, available int
, others int
, "Group_1" int
, "Group_2" int
, "Group_3" int);
但是:现在我想通过过滤 Group_Name
来显示上述记录的枢轴table ].
这意味着如果我想为唯一的 Group_Name= Group_3
显示枢轴 table 那么它有
仅显示仅属于 Group_Name= Group_3
的员工。
如果我只想查看属于 Group_3
的员工,它必须向我显示:
Employee_Number total_roles total_groups available others Group_3
-------------------------------------------------------------------------------
EMP106 1 3 1 0 1
EMP107 1 3 1 0 1
注意:正如您在第一个 table 中看到的,员工 EMP106
和 EMP107
只属于
到 Group_Name = Group_3
。员工 EMP101
也属于,但他也属于其他组
所以不应该出现在这个 table.
如何排除违规行:
- Select rows which are not present in other table
已修改 crosstab()
查询:
SELECT * FROM crosstab(
$$SELECT grp.*, e.group_name
, CASE WHEN e.employee_number IS NULL THEN 0 ELSE 1 END AS val
FROM (
SELECT employee_number
, count(employee_role)::int AS total_roles
, (SELECT count(DISTINCT group_name)::int
FROM employee
WHERE group_name <> '') AS total_groups
, count(group_name <> '' OR NULL)::int AS available
, count(group_name = '' OR NULL)::int AS others
FROM employee
GROUP BY employee_number
) grp
JOIN employee e <b>USING (employee_number)
WHERE e.group_name = 'Group_3'
AND NOT EXISTS (
SELECT 1 FROM employee
WHERE employee_number = e.employee_number
AND group_name e.group_name
)
ORDER BY employee_number</b>$$
,$$VALUES <b>('Group_3')</b>$$
) AS ct (employee_number text
, total_roles int
, total_groups int
, available int
, others int
<b>, "Group_3" int)</b>;
但是如您所见,我们根本不需要 crosstab()
。简化为:
SELECT grp.*, 1 AS "Group_3"
FROM (
SELECT employee_number
, count(employee_role)::int AS total_roles
, (SELECT count(DISTINCT group_name)::int
FROM employee
WHERE group_name <> '') AS total_groups
, count(group_name <> '' OR NULL)::int AS available
, count(group_name = '' OR NULL)::int AS others
FROM employee
GROUP BY employee_number
) grp
JOIN employee e USING (employee_number)
WHERE e.group_name = 'Group_3'
AND NOT EXISTS (
SELECT 1 FROM employee
WHERE employee_number = e.employee_number
AND group_name <> e.group_name
)
ORDER BY employee_number;
列 "Group_3"
在这里实际上只是噪音,因为根据定义它总是 1
。
如果仅以这种方式选择了一小部分行,则带有 LATERAL
连接的此版本应该快得多:
SELECT e.employee_number
, grp.total_roles
, total.total_groups
, grp.available
, grp.others
, 1 AS "Group_3"
FROM (
SELECT employee_number
FROM employee e
WHERE group_name = 'Group_3'
AND NOT EXISTS (
SELECT 1 FROM employee
WHERE employee_number = e.employee_number
AND group_name <> e.group_name
)
) e
, LATERAL (
SELECT count(employee_role)::int AS total_roles
, count(group_name <> '' OR NULL)::int AS available
, count(group_name = '' OR NULL)::int AS others
FROM employee
WHERE employee_number = e.employee_number
GROUP BY employee_number
) grp
, (
SELECT count(DISTINCT group_name)::int AS total_groups
FROM employee
WHERE group_name <> ''
) total
ORDER BY employee_number;
LATERAL
解决方案和性能的详细信息:
- Optimize GROUP BY query to retrieve latest row per user
适用于任何一组组的简单通用解决方案
未针对性能进行优化,但易于适应:
<original crosstab query from your question>
WHERE "Group_3" = 1
AND "Group_1" IS NULL
AND "Group_2" IS NULL
AND "Group_4" IS NULL
AND others = 0 -- to rule out membership in the "empty" group
-- possibly more ...
嗯,我有这个记录:
Employee_Number Employee_role Group_Name
----------------------------------------------------
EMP101 C# Developer Group_1
EMP102 ASP Developer Group_1
EMP103 SQL Developer Group_2
EMP104 PLSQL Developer Group_2
EMP101 Java Developer
EMP102 Web Developer
EMP101 DBA
EMP105 DBA
EMP106 SQL Developer Group_3
EMP107 Oracle Developer Group_3
EMP101 Oracle Developer Group_3
想要以下列格式显示上述记录的枢轴table:
Employee_Number TotalRoles TotalGroups Available Others Group_1 Group_2 Group_3
-----------------------------------------------------------------------------------------------------------------
EMP101 4 3 2 2 1 1
EMP102 2 3 1 1 1
EMP103 1 3 1 0 1
EMP104 1 3 1 0 1
EMP105 1 3 0 1
EMP106 1 3 1 0 1
EMP107 1 3 1 0 1
对于上述结果,我使用以下脚本:
SELECT * FROM crosstab(
$$SELECT grp.*, e.group_name
, CASE WHEN e.employee_number IS NULL THEN 0 ELSE 1 END AS val
FROM (
SELECT employee_number
, count(employee_role)::int AS total_roles
, (SELECT count(DISTINCT group_name)::int
FROM employee
WHERE group_name <> '') AS total_groups
, count(group_name <> '' OR NULL)::INT AS available
, count(group_name = '' OR NULL)::int AS others
FROM employee
GROUP BY employee_number
) grp
LEFT JOIN employee e ON e.employee_number = grp.employee_number
AND e.group_name <> ''
ORDER BY grp.employee_number, e.group_name$$
,$$VALUES ('Group_1'),('Group_2'),('Group_3')$$
) AS ct (employee_number text
, total_roles int
, total_groups int
, available int
, others int
, "Group_1" int
, "Group_2" int
, "Group_3" int);
但是:现在我想通过过滤 Group_Name
来显示上述记录的枢轴table ].
这意味着如果我想为唯一的 Group_Name= Group_3
显示枢轴 table 那么它有
仅显示仅属于 Group_Name= Group_3
的员工。
如果我只想查看属于 Group_3
的员工,它必须向我显示:
Employee_Number total_roles total_groups available others Group_3
-------------------------------------------------------------------------------
EMP106 1 3 1 0 1
EMP107 1 3 1 0 1
注意:正如您在第一个 table 中看到的,员工 EMP106
和 EMP107
只属于
到 Group_Name = Group_3
。员工 EMP101
也属于,但他也属于其他组
所以不应该出现在这个 table.
如何排除违规行:
- Select rows which are not present in other table
已修改 crosstab()
查询:
SELECT * FROM crosstab(
$$SELECT grp.*, e.group_name
, CASE WHEN e.employee_number IS NULL THEN 0 ELSE 1 END AS val
FROM (
SELECT employee_number
, count(employee_role)::int AS total_roles
, (SELECT count(DISTINCT group_name)::int
FROM employee
WHERE group_name <> '') AS total_groups
, count(group_name <> '' OR NULL)::int AS available
, count(group_name = '' OR NULL)::int AS others
FROM employee
GROUP BY employee_number
) grp
JOIN employee e <b>USING (employee_number)
WHERE e.group_name = 'Group_3'
AND NOT EXISTS (
SELECT 1 FROM employee
WHERE employee_number = e.employee_number
AND group_name e.group_name
)
ORDER BY employee_number</b>$$
,$$VALUES <b>('Group_3')</b>$$
) AS ct (employee_number text
, total_roles int
, total_groups int
, available int
, others int
<b>, "Group_3" int)</b>;
但是如您所见,我们根本不需要 crosstab()
。简化为:
SELECT grp.*, 1 AS "Group_3"
FROM (
SELECT employee_number
, count(employee_role)::int AS total_roles
, (SELECT count(DISTINCT group_name)::int
FROM employee
WHERE group_name <> '') AS total_groups
, count(group_name <> '' OR NULL)::int AS available
, count(group_name = '' OR NULL)::int AS others
FROM employee
GROUP BY employee_number
) grp
JOIN employee e USING (employee_number)
WHERE e.group_name = 'Group_3'
AND NOT EXISTS (
SELECT 1 FROM employee
WHERE employee_number = e.employee_number
AND group_name <> e.group_name
)
ORDER BY employee_number;
列 "Group_3"
在这里实际上只是噪音,因为根据定义它总是 1
。
如果仅以这种方式选择了一小部分行,则带有 LATERAL
连接的此版本应该快得多:
SELECT e.employee_number
, grp.total_roles
, total.total_groups
, grp.available
, grp.others
, 1 AS "Group_3"
FROM (
SELECT employee_number
FROM employee e
WHERE group_name = 'Group_3'
AND NOT EXISTS (
SELECT 1 FROM employee
WHERE employee_number = e.employee_number
AND group_name <> e.group_name
)
) e
, LATERAL (
SELECT count(employee_role)::int AS total_roles
, count(group_name <> '' OR NULL)::int AS available
, count(group_name = '' OR NULL)::int AS others
FROM employee
WHERE employee_number = e.employee_number
GROUP BY employee_number
) grp
, (
SELECT count(DISTINCT group_name)::int AS total_groups
FROM employee
WHERE group_name <> ''
) total
ORDER BY employee_number;
LATERAL
解决方案和性能的详细信息:
- Optimize GROUP BY query to retrieve latest row per user
适用于任何一组组的简单通用解决方案
未针对性能进行优化,但易于适应:
<original crosstab query from your question>
WHERE "Group_3" = 1
AND "Group_1" IS NULL
AND "Group_2" IS NULL
AND "Group_4" IS NULL
AND others = 0 -- to rule out membership in the "empty" group
-- possibly more ...