如何在 PostgreSQL 中聚合具有可变键数的 JSON 数组?
How can I aggregate a JSON array with a variable number of keys in PostgreSQL?
我在 PostgreSQL table 中有一系列行,如下所示:
-[ RECORD 1 ]---------------------------------------------------------------------
student | e04c0ae4709340cb8e03c52f444e723f
group | 1
subgroup | 1
variable | VAR1
status | { "track_A" : "Done", "track_B" : "Done", "track_C" : "To Do" }
-[ RECORD 2 ]---------------------------------------------------------------------
student | e04c0ae4709340cb8e03c52f444e723f
group | 1
subgroup | 1
variable | VAR2
status | { "track_A" : "To Do", "track_B" : "Done", "track_C" : "To Do" }
-[ RECORD 3 ]---------------------------------------------------------------------
student | 849d1e6a0c2b4530a2b550829df94556
group | 0
subgroup | 1
variable | VAR3
status | { "track_A" : "Done", "track_B" : "To Do", "track_C" : "To Do" }
我想按学生、组和子组对它们进行分组,并获得每个轨道的计数状态。类似于:
-[ RECORD 1 ]---------------------------------------------------------------------
student | e04c0ae4709340cb8e03c52f444e723f
group | 1
subgroup | 1
totals | { "track_A" : {"done": 1, "to_do": 1}, {"track_B" : {"done": 0, "to_do": 2}, "track_C" : {"done": 0, "to_do": 2} }
问题在于曲目数量可能会有所不同。我知道他们的名字,但他们不是静态的,所以我不能做简单的聚合。有什么建议我可以在 PostgreSQL (9.5) 中写这个吗?我不想遍历所有轨道并聚合,因为操作需要一些时间。
您可以使用json_each_text
to "unest" values and json_object_agg
再次组合它。
数据:
DROP TABLE IF EXISTS tab;
CREATE TABLE tab(student VARCHAR(36), "group" INT, subgroup INT,
variable VARCHAR(20), status JSON);
INSERT INTO tab(student, "group", subgroup, variable, status)
VALUES
('e04c0ae4709340cb8e03c52f444e723f',1,1,'VAR1'
,'{ "track_A" : "Done", "track_B" : "Done", "track_C" : "To Do" }'),
('e04c0ae4709340cb8e03c52f444e723f',1,1,'VAR2'
, '{ "track_A" : "To Do", "track_B" : "Done", "track_C" : "To Do" }')
,('849d1e6a0c2b4530a2b550829df94556',0,1,'VAR3'
,'{ "track_A" : "Done", "track_B" : "To Do", "track_C" : "To Do" }');
查询:
WITH cte AS
(
SELECT student, "group", subgroup, k
,COUNT(CASE WHEN v='Done' THEN 1 END) AS Done
,COUNT(CASE WHEN v='To Do' THEN 1 END) AS To_do
FROM tab
,LATERAL json_each_text(status) s(k,v)
GROUP BY student, "group", subgroup, k
), cte2 AS
(
SELECT student, "group", subgroup, k, json_object_agg(s.status, s.cnt) AS j
FROM cte
,LATERAL (VALUES('Done', Done),('To Do', To_Do)) AS s(status, cnt)
GROUP BY student, "group", subgroup, k
)
SELECT student, "group", subgroup
,json_object_agg(k, j) AS totals
FROM cte2
GROUP BY student, "group", subgroup;
输出:
我在 PostgreSQL table 中有一系列行,如下所示:
-[ RECORD 1 ]---------------------------------------------------------------------
student | e04c0ae4709340cb8e03c52f444e723f
group | 1
subgroup | 1
variable | VAR1
status | { "track_A" : "Done", "track_B" : "Done", "track_C" : "To Do" }
-[ RECORD 2 ]---------------------------------------------------------------------
student | e04c0ae4709340cb8e03c52f444e723f
group | 1
subgroup | 1
variable | VAR2
status | { "track_A" : "To Do", "track_B" : "Done", "track_C" : "To Do" }
-[ RECORD 3 ]---------------------------------------------------------------------
student | 849d1e6a0c2b4530a2b550829df94556
group | 0
subgroup | 1
variable | VAR3
status | { "track_A" : "Done", "track_B" : "To Do", "track_C" : "To Do" }
我想按学生、组和子组对它们进行分组,并获得每个轨道的计数状态。类似于:
-[ RECORD 1 ]---------------------------------------------------------------------
student | e04c0ae4709340cb8e03c52f444e723f
group | 1
subgroup | 1
totals | { "track_A" : {"done": 1, "to_do": 1}, {"track_B" : {"done": 0, "to_do": 2}, "track_C" : {"done": 0, "to_do": 2} }
问题在于曲目数量可能会有所不同。我知道他们的名字,但他们不是静态的,所以我不能做简单的聚合。有什么建议我可以在 PostgreSQL (9.5) 中写这个吗?我不想遍历所有轨道并聚合,因为操作需要一些时间。
您可以使用json_each_text
to "unest" values and json_object_agg
再次组合它。
数据:
DROP TABLE IF EXISTS tab;
CREATE TABLE tab(student VARCHAR(36), "group" INT, subgroup INT,
variable VARCHAR(20), status JSON);
INSERT INTO tab(student, "group", subgroup, variable, status)
VALUES
('e04c0ae4709340cb8e03c52f444e723f',1,1,'VAR1'
,'{ "track_A" : "Done", "track_B" : "Done", "track_C" : "To Do" }'),
('e04c0ae4709340cb8e03c52f444e723f',1,1,'VAR2'
, '{ "track_A" : "To Do", "track_B" : "Done", "track_C" : "To Do" }')
,('849d1e6a0c2b4530a2b550829df94556',0,1,'VAR3'
,'{ "track_A" : "Done", "track_B" : "To Do", "track_C" : "To Do" }');
查询:
WITH cte AS
(
SELECT student, "group", subgroup, k
,COUNT(CASE WHEN v='Done' THEN 1 END) AS Done
,COUNT(CASE WHEN v='To Do' THEN 1 END) AS To_do
FROM tab
,LATERAL json_each_text(status) s(k,v)
GROUP BY student, "group", subgroup, k
), cte2 AS
(
SELECT student, "group", subgroup, k, json_object_agg(s.status, s.cnt) AS j
FROM cte
,LATERAL (VALUES('Done', Done),('To Do', To_Do)) AS s(status, cnt)
GROUP BY student, "group", subgroup, k
)
SELECT student, "group", subgroup
,json_object_agg(k, j) AS totals
FROM cte2
GROUP BY student, "group", subgroup;
输出: